blob: 2494c1eb0570c8894e5bc7e10e957eb0c5a21183 [file] [log] [blame]
Mike Klein894d5612017-03-07 07:59:52 -05001/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8// This file is generated semi-automatically with this command:
9// $ src/jumper/build_stages.py
10
11#include <stdint.h>
12
13#if defined(_MSC_VER)
14 #pragma section("code", read,execute)
15 #define CODE extern "C" __declspec(allocate("code"))
16#elif defined(__MACH__)
17 #define CODE extern "C" __attribute__((section("__TEXT,__text")))
18#else
Mike Klein30115682017-03-08 19:10:44 +000019 #define CODE extern "C" __attribute__((section(".text")))
Mike Klein894d5612017-03-07 07:59:52 -050020#endif
21
22#if defined(__aarch64__)
23
24CODE const uint32_t sk_start_pipeline_aarch64[] = {
25 0xa9bd5bf7, //stp x23, x22, [sp, #-48]!
26 0xa90153f5, //stp x21, x20, [sp, #16]
27 0xa9027bf3, //stp x19, x30, [sp, #32]
Mike Klein64b97482017-03-14 17:35:04 -070028 0xaa0103f4, //mov x20, x1
29 0xf8408697, //ldr x23, [x20], #8
30 0xaa0003f5, //mov x21, x0
Mike Klein894d5612017-03-07 07:59:52 -050031 0xaa0303f3, //mov x19, x3
Mike Klein64b97482017-03-14 17:35:04 -070032 0x910012a8, //add x8, x21, #0x4
Mike Klein894d5612017-03-07 07:59:52 -050033 0xeb13011f, //cmp x8, x19
Mike Klein64b97482017-03-14 17:35:04 -070034 0xaa0203f6, //mov x22, x2
Mike Klein894d5612017-03-07 07:59:52 -050035 0x54000069, //b.ls 34 <sk_start_pipeline_aarch64+0x34> // b.plast
Mike Klein64b97482017-03-14 17:35:04 -070036 0xaa1503e0, //mov x0, x21
Mike Klein894d5612017-03-07 07:59:52 -050037 0x14000012, //b 78 <sk_start_pipeline_aarch64+0x78>
38 0x6f00e400, //movi v0.2d, #0x0
39 0x6f00e401, //movi v1.2d, #0x0
40 0x6f00e402, //movi v2.2d, #0x0
41 0x6f00e403, //movi v3.2d, #0x0
42 0x6f00e404, //movi v4.2d, #0x0
43 0x6f00e405, //movi v5.2d, #0x0
44 0x6f00e406, //movi v6.2d, #0x0
45 0x6f00e407, //movi v7.2d, #0x0
Mike Klein64b97482017-03-14 17:35:04 -070046 0xaa1503e0, //mov x0, x21
47 0xaa1403e1, //mov x1, x20
48 0xaa1603e2, //mov x2, x22
Mike Klein894d5612017-03-07 07:59:52 -050049 0xd63f02e0, //blr x23
Mike Klein64b97482017-03-14 17:35:04 -070050 0x910012a0, //add x0, x21, #0x4
51 0x910022a8, //add x8, x21, #0x8
Mike Klein894d5612017-03-07 07:59:52 -050052 0xeb13011f, //cmp x8, x19
Mike Klein64b97482017-03-14 17:35:04 -070053 0xaa0003f5, //mov x21, x0
Mike Klein894d5612017-03-07 07:59:52 -050054 0x54fffe09, //b.ls 34 <sk_start_pipeline_aarch64+0x34> // b.plast
55 0xa9427bf3, //ldp x19, x30, [sp, #32]
56 0xa94153f5, //ldp x21, x20, [sp, #16]
57 0xa8c35bf7, //ldp x23, x22, [sp], #48
58 0xd65f03c0, //ret
59};
60
61CODE const uint32_t sk_just_return_aarch64[] = {
62 0xd65f03c0, //ret
63};
64
65CODE const uint32_t sk_seed_shader_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050066 0xa8c10c28, //ldp x8, x3, [x1], #16
Mike Klein5224f462017-03-07 17:29:54 -050067 0x3dc00046, //ldr q6, [x2]
Mike Klein894d5612017-03-07 07:59:52 -050068 0x4e040c00, //dup v0.4s, w0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050069 0x4f0167e7, //movi v7.4s, #0x3f, lsl #24
Mike Klein894d5612017-03-07 07:59:52 -050070 0x4d40c901, //ld1r {v1.4s}, [x8]
Mike Klein894d5612017-03-07 07:59:52 -050071 0x4e21d800, //scvtf v0.4s, v0.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050072 0x4e27d400, //fadd v0.4s, v0.4s, v7.4s
73 0x4f03f602, //fmov v2.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -050074 0x4e21d821, //scvtf v1.4s, v1.4s
Mike Klein894d5612017-03-07 07:59:52 -050075 0x6f00e403, //movi v3.2d, #0x0
76 0x6f00e404, //movi v4.2d, #0x0
77 0x6f00e405, //movi v5.2d, #0x0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050078 0x4e26d400, //fadd v0.4s, v0.4s, v6.4s
Mike Klein894d5612017-03-07 07:59:52 -050079 0x6f00e406, //movi v6.2d, #0x0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050080 0x4e27d421, //fadd v1.4s, v1.4s, v7.4s
Mike Klein894d5612017-03-07 07:59:52 -050081 0x6f00e407, //movi v7.2d, #0x0
Mike Klein894d5612017-03-07 07:59:52 -050082 0xd61f0060, //br x3
83};
84
85CODE const uint32_t sk_constant_color_aarch64[] = {
86 0xa8c10c28, //ldp x8, x3, [x1], #16
87 0x3dc00103, //ldr q3, [x8]
88 0x4e040460, //dup v0.4s, v3.s[0]
89 0x4e0c0461, //dup v1.4s, v3.s[1]
90 0x4e140462, //dup v2.4s, v3.s[2]
91 0x4e1c0463, //dup v3.4s, v3.s[3]
92 0xd61f0060, //br x3
93};
94
95CODE const uint32_t sk_clear_aarch64[] = {
96 0xf8408423, //ldr x3, [x1], #8
97 0x6f00e400, //movi v0.2d, #0x0
98 0x6f00e401, //movi v1.2d, #0x0
99 0x6f00e402, //movi v2.2d, #0x0
100 0x6f00e403, //movi v3.2d, #0x0
101 0xd61f0060, //br x3
102};
103
104CODE const uint32_t sk_plus__aarch64[] = {
105 0xf8408423, //ldr x3, [x1], #8
106 0x4e24d400, //fadd v0.4s, v0.4s, v4.4s
107 0x4e25d421, //fadd v1.4s, v1.4s, v5.4s
108 0x4e26d442, //fadd v2.4s, v2.4s, v6.4s
109 0x4e27d463, //fadd v3.4s, v3.4s, v7.4s
110 0xd61f0060, //br x3
111};
112
113CODE const uint32_t sk_srcover_aarch64[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500114 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500115 0x4f03f610, //fmov v16.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500116 0x4ea3d610, //fsub v16.4s, v16.4s, v3.4s
117 0x4e24ce00, //fmla v0.4s, v16.4s, v4.4s
118 0x4e25ce01, //fmla v1.4s, v16.4s, v5.4s
119 0x4e26ce02, //fmla v2.4s, v16.4s, v6.4s
120 0x4e27ce03, //fmla v3.4s, v16.4s, v7.4s
121 0xd61f0060, //br x3
122};
123
124CODE const uint32_t sk_dstover_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500125 0x4f03f611, //fmov v17.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500126 0xf8408423, //ldr x3, [x1], #8
127 0x4ea41c90, //mov v16.16b, v4.16b
Mike Klein894d5612017-03-07 07:59:52 -0500128 0x4ea7d634, //fsub v20.4s, v17.4s, v7.4s
129 0x4ea51cb1, //mov v17.16b, v5.16b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500130 0x4ea61cd2, //mov v18.16b, v6.16b
Mike Klein894d5612017-03-07 07:59:52 -0500131 0x4ea71cf3, //mov v19.16b, v7.16b
132 0x4e20ce90, //fmla v16.4s, v20.4s, v0.4s
133 0x4e21ce91, //fmla v17.4s, v20.4s, v1.4s
134 0x4e22ce92, //fmla v18.4s, v20.4s, v2.4s
135 0x4e23ce93, //fmla v19.4s, v20.4s, v3.4s
136 0x4eb01e00, //mov v0.16b, v16.16b
137 0x4eb11e21, //mov v1.16b, v17.16b
138 0x4eb21e42, //mov v2.16b, v18.16b
139 0x4eb31e63, //mov v3.16b, v19.16b
140 0xd61f0060, //br x3
141};
142
143CODE const uint32_t sk_clamp_0_aarch64[] = {
144 0xf8408423, //ldr x3, [x1], #8
145 0x6f00e410, //movi v16.2d, #0x0
146 0x4e30f400, //fmax v0.4s, v0.4s, v16.4s
147 0x4e30f421, //fmax v1.4s, v1.4s, v16.4s
148 0x4e30f442, //fmax v2.4s, v2.4s, v16.4s
149 0x4e30f463, //fmax v3.4s, v3.4s, v16.4s
150 0xd61f0060, //br x3
151};
152
153CODE const uint32_t sk_clamp_1_aarch64[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500154 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500155 0x4f03f610, //fmov v16.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500156 0x4eb0f400, //fmin v0.4s, v0.4s, v16.4s
157 0x4eb0f421, //fmin v1.4s, v1.4s, v16.4s
158 0x4eb0f442, //fmin v2.4s, v2.4s, v16.4s
159 0x4eb0f463, //fmin v3.4s, v3.4s, v16.4s
160 0xd61f0060, //br x3
161};
162
163CODE const uint32_t sk_clamp_a_aarch64[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500164 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500165 0x4f03f610, //fmov v16.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500166 0x4eb0f463, //fmin v3.4s, v3.4s, v16.4s
167 0x4ea3f400, //fmin v0.4s, v0.4s, v3.4s
168 0x4ea3f421, //fmin v1.4s, v1.4s, v3.4s
169 0x4ea3f442, //fmin v2.4s, v2.4s, v3.4s
170 0xd61f0060, //br x3
171};
172
173CODE const uint32_t sk_set_rgb_aarch64[] = {
174 0xa8c10c28, //ldp x8, x3, [x1], #16
175 0xaa0803e9, //mov x9, x8
176 0x4ddfc920, //ld1r {v0.4s}, [x9], #4
177 0x91002108, //add x8, x8, #0x8
178 0x4d40c902, //ld1r {v2.4s}, [x8]
179 0x4d40c921, //ld1r {v1.4s}, [x9]
180 0xd61f0060, //br x3
181};
182
183CODE const uint32_t sk_swap_rb_aarch64[] = {
184 0xf8408423, //ldr x3, [x1], #8
185 0x4ea01c10, //mov v16.16b, v0.16b
186 0x4ea21c40, //mov v0.16b, v2.16b
187 0x4eb01e02, //mov v2.16b, v16.16b
188 0xd61f0060, //br x3
189};
190
191CODE const uint32_t sk_swap_aarch64[] = {
192 0xf8408423, //ldr x3, [x1], #8
193 0x4ea31c70, //mov v16.16b, v3.16b
194 0x4ea21c51, //mov v17.16b, v2.16b
195 0x4ea11c32, //mov v18.16b, v1.16b
196 0x4ea01c13, //mov v19.16b, v0.16b
197 0x4ea41c80, //mov v0.16b, v4.16b
198 0x4ea51ca1, //mov v1.16b, v5.16b
199 0x4ea61cc2, //mov v2.16b, v6.16b
200 0x4ea71ce3, //mov v3.16b, v7.16b
201 0x4eb31e64, //mov v4.16b, v19.16b
202 0x4eb21e45, //mov v5.16b, v18.16b
203 0x4eb11e26, //mov v6.16b, v17.16b
204 0x4eb01e07, //mov v7.16b, v16.16b
205 0xd61f0060, //br x3
206};
207
208CODE const uint32_t sk_move_src_dst_aarch64[] = {
209 0xf8408423, //ldr x3, [x1], #8
210 0x4ea01c04, //mov v4.16b, v0.16b
211 0x4ea11c25, //mov v5.16b, v1.16b
212 0x4ea21c46, //mov v6.16b, v2.16b
213 0x4ea31c67, //mov v7.16b, v3.16b
214 0xd61f0060, //br x3
215};
216
217CODE const uint32_t sk_move_dst_src_aarch64[] = {
218 0xf8408423, //ldr x3, [x1], #8
219 0x4ea41c80, //mov v0.16b, v4.16b
220 0x4ea51ca1, //mov v1.16b, v5.16b
221 0x4ea61cc2, //mov v2.16b, v6.16b
222 0x4ea71ce3, //mov v3.16b, v7.16b
223 0xd61f0060, //br x3
224};
225
226CODE const uint32_t sk_premul_aarch64[] = {
227 0xf8408423, //ldr x3, [x1], #8
228 0x6e23dc00, //fmul v0.4s, v0.4s, v3.4s
229 0x6e23dc21, //fmul v1.4s, v1.4s, v3.4s
230 0x6e23dc42, //fmul v2.4s, v2.4s, v3.4s
231 0xd61f0060, //br x3
232};
233
234CODE const uint32_t sk_unpremul_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500235 0x4f03f611, //fmov v17.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500236 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500237 0x4ea0d870, //fcmeq v16.4s, v3.4s, #0.0
238 0x6e23fe31, //fdiv v17.4s, v17.4s, v3.4s
239 0x4e701e30, //bic v16.16b, v17.16b, v16.16b
Mike Klein894d5612017-03-07 07:59:52 -0500240 0x6e20de00, //fmul v0.4s, v16.4s, v0.4s
241 0x6e21de01, //fmul v1.4s, v16.4s, v1.4s
242 0x6e22de02, //fmul v2.4s, v16.4s, v2.4s
243 0xd61f0060, //br x3
244};
245
246CODE const uint32_t sk_from_srgb_aarch64[] = {
Mike Klein5224f462017-03-07 17:29:54 -0500247 0x52a7d328, //mov w8, #0x3e990000
248 0x72933348, //movk w8, #0x999a
249 0x4e040d10, //dup v16.4s, w8
250 0x52a7e648, //mov w8, #0x3f320000
251 0x7291eb88, //movk w8, #0x8f5c
252 0x4e040d11, //dup v17.4s, w8
253 0x52a76468, //mov w8, #0x3b230000
254 0x729ae148, //movk w8, #0xd70a
255 0x4e040d12, //dup v18.4s, w8
256 0x52a7b3c8, //mov w8, #0x3d9e0000
257 0x72907228, //movk w8, #0x8391
Mike Klein894d5612017-03-07 07:59:52 -0500258 0x6e22dc54, //fmul v20.4s, v2.4s, v2.4s
Mike Klein5224f462017-03-07 17:29:54 -0500259 0x4eb11e35, //mov v21.16b, v17.16b
260 0x4eb11e37, //mov v23.16b, v17.16b
261 0x4e22ce11, //fmla v17.4s, v16.4s, v2.4s
262 0x4eb21e56, //mov v22.16b, v18.16b
263 0x4eb21e58, //mov v24.16b, v18.16b
264 0x4e34ce32, //fmla v18.4s, v17.4s, v20.4s
265 0x4e040d11, //dup v17.4s, w8
266 0x52a7ac28, //mov w8, #0x3d610000
267 0x6e20dc13, //fmul v19.4s, v0.4s, v0.4s
268 0x7288f5c8, //movk w8, #0x47ae
269 0x4e20ce15, //fmla v21.4s, v16.4s, v0.4s
Mike Klein894d5612017-03-07 07:59:52 -0500270 0xf8408423, //ldr x3, [x1], #8
271 0x6e21dc34, //fmul v20.4s, v1.4s, v1.4s
Mike Klein5224f462017-03-07 17:29:54 -0500272 0x4e33ceb6, //fmla v22.4s, v21.4s, v19.4s
273 0x4e040d13, //dup v19.4s, w8
274 0x4e21ce17, //fmla v23.4s, v16.4s, v1.4s
275 0x6e31dc15, //fmul v21.4s, v0.4s, v17.4s
276 0x6ea0e660, //fcmgt v0.4s, v19.4s, v0.4s
277 0x6e31dc30, //fmul v16.4s, v1.4s, v17.4s
278 0x6ea1e661, //fcmgt v1.4s, v19.4s, v1.4s
279 0x6e31dc51, //fmul v17.4s, v2.4s, v17.4s
280 0x6ea2e662, //fcmgt v2.4s, v19.4s, v2.4s
Mike Klein894d5612017-03-07 07:59:52 -0500281 0x4e34cef8, //fmla v24.4s, v23.4s, v20.4s
282 0x6e761ea0, //bsl v0.16b, v21.16b, v22.16b
Mike Klein5224f462017-03-07 17:29:54 -0500283 0x6e781e01, //bsl v1.16b, v16.16b, v24.16b
284 0x6e721e22, //bsl v2.16b, v17.16b, v18.16b
Mike Klein894d5612017-03-07 07:59:52 -0500285 0xd61f0060, //br x3
286};
287
288CODE const uint32_t sk_to_srgb_aarch64[] = {
Mike Klein5224f462017-03-07 17:29:54 -0500289 0x52a828e8, //mov w8, #0x41470000
290 0x728b8528, //movk w8, #0x5c29
291 0x4e040d12, //dup v18.4s, w8
292 0x52a7e608, //mov w8, #0x3f300000
293 0x728df9c8, //movk w8, #0x6fce
Mike Klein894d5612017-03-07 07:59:52 -0500294 0x6ea1d811, //frsqrte v17.4s, v0.4s
Mike Klein5224f462017-03-07 17:29:54 -0500295 0x4e040d13, //dup v19.4s, w8
296 0x52b7b948, //mov w8, #0xbdca0000
297 0x728af508, //movk w8, #0x57a8
298 0x6ea1d834, //frsqrte v20.4s, v1.4s
299 0x6e31de36, //fmul v22.4s, v17.4s, v17.4s
300 0x4e040d10, //dup v16.4s, w8
301 0x52a77188, //mov w8, #0x3b8c0000
302 0x6ea1d855, //frsqrte v21.4s, v2.4s
303 0x6e34de98, //fmul v24.4s, v20.4s, v20.4s
304 0x4eb6fc16, //frsqrts v22.4s, v0.4s, v22.4s
305 0x729ce088, //movk w8, #0xe704
Mike Klein894d5612017-03-07 07:59:52 -0500306 0x6e35deb9, //fmul v25.4s, v21.4s, v21.4s
Mike Klein5224f462017-03-07 17:29:54 -0500307 0x4eb8fc38, //frsqrts v24.4s, v1.4s, v24.4s
308 0x6e36de31, //fmul v17.4s, v17.4s, v22.4s
309 0x4e040d17, //dup v23.4s, w8
310 0x4eb9fc59, //frsqrts v25.4s, v2.4s, v25.4s
311 0x6e38de94, //fmul v20.4s, v20.4s, v24.4s
312 0x4ea1da36, //frecpe v22.4s, v17.4s
313 0x6e32dc1a, //fmul v26.4s, v0.4s, v18.4s
314 0x6ea0e6e0, //fcmgt v0.4s, v23.4s, v0.4s
315 0x6e32dc3c, //fmul v28.4s, v1.4s, v18.4s
316 0x6ea1e6e1, //fcmgt v1.4s, v23.4s, v1.4s
317 0x6e32dc52, //fmul v18.4s, v2.4s, v18.4s
318 0x6ea2e6e2, //fcmgt v2.4s, v23.4s, v2.4s
Mike Klein894d5612017-03-07 07:59:52 -0500319 0x6e39deb5, //fmul v21.4s, v21.4s, v25.4s
Mike Klein5224f462017-03-07 17:29:54 -0500320 0x4ea1da97, //frecpe v23.4s, v20.4s
321 0x4e36fe39, //frecps v25.4s, v17.4s, v22.4s
322 0x4ea1dab8, //frecpe v24.4s, v21.4s
323 0x6e39ded6, //fmul v22.4s, v22.4s, v25.4s
324 0x4e37fe99, //frecps v25.4s, v20.4s, v23.4s
325 0x4eb01e1b, //mov v27.16b, v16.16b
326 0x6e39def7, //fmul v23.4s, v23.4s, v25.4s
327 0x4e38feb9, //frecps v25.4s, v21.4s, v24.4s
328 0x6e39df18, //fmul v24.4s, v24.4s, v25.4s
329 0x4eb01e19, //mov v25.16b, v16.16b
330 0x4e36ce7b, //fmla v27.4s, v19.4s, v22.4s
331 0x6ea1da36, //frsqrte v22.4s, v17.4s
332 0x4e37ce79, //fmla v25.4s, v19.4s, v23.4s
333 0x6ea1da97, //frsqrte v23.4s, v20.4s
334 0x4e38ce70, //fmla v16.4s, v19.4s, v24.4s
335 0x6e36ded8, //fmul v24.4s, v22.4s, v22.4s
336 0x6ea1dab3, //frsqrte v19.4s, v21.4s
Mike Klein894d5612017-03-07 07:59:52 -0500337 0x4eb8fe31, //frsqrts v17.4s, v17.4s, v24.4s
Mike Klein5224f462017-03-07 17:29:54 -0500338 0x6e37def8, //fmul v24.4s, v23.4s, v23.4s
339 0x4eb8fe94, //frsqrts v20.4s, v20.4s, v24.4s
340 0x6e33de78, //fmul v24.4s, v19.4s, v19.4s
341 0x52a7da48, //mov w8, #0x3ed20000
Mike Klein894d5612017-03-07 07:59:52 -0500342 0x4eb8feb5, //frsqrts v21.4s, v21.4s, v24.4s
Mike Klein5224f462017-03-07 17:29:54 -0500343 0x7290f848, //movk w8, #0x87c2
344 0x6e31ded1, //fmul v17.4s, v22.4s, v17.4s
345 0x6e34def4, //fmul v20.4s, v23.4s, v20.4s
346 0x6e35de73, //fmul v19.4s, v19.4s, v21.4s
347 0x4e040d15, //dup v21.4s, w8
Mike Klein894d5612017-03-07 07:59:52 -0500348 0xf8408423, //ldr x3, [x1], #8
Mike Klein5224f462017-03-07 17:29:54 -0500349 0x4e31cebb, //fmla v27.4s, v21.4s, v17.4s
350 0x4f03f611, //fmov v17.4s, #1.000000000000000000e+00
351 0x4e34ceb9, //fmla v25.4s, v21.4s, v20.4s
352 0x4e33ceb0, //fmla v16.4s, v21.4s, v19.4s
353 0x4ebbf633, //fmin v19.4s, v17.4s, v27.4s
354 0x4eb9f634, //fmin v20.4s, v17.4s, v25.4s
355 0x4eb0f630, //fmin v16.4s, v17.4s, v16.4s
356 0x6e731f40, //bsl v0.16b, v26.16b, v19.16b
357 0x6e741f81, //bsl v1.16b, v28.16b, v20.16b
358 0x6e701e42, //bsl v2.16b, v18.16b, v16.16b
Mike Klein894d5612017-03-07 07:59:52 -0500359 0xd61f0060, //br x3
360};
361
362CODE const uint32_t sk_scale_1_float_aarch64[] = {
363 0xa8c10c28, //ldp x8, x3, [x1], #16
364 0xbd400110, //ldr s16, [x8]
365 0x4f909000, //fmul v0.4s, v0.4s, v16.s[0]
366 0x4f909021, //fmul v1.4s, v1.4s, v16.s[0]
367 0x4f909042, //fmul v2.4s, v2.4s, v16.s[0]
368 0x4f909063, //fmul v3.4s, v3.4s, v16.s[0]
369 0xd61f0060, //br x3
370};
371
372CODE const uint32_t sk_scale_u8_aarch64[] = {
373 0xa8c10c28, //ldp x8, x3, [x1], #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500374 0x52a77009, //mov w9, #0x3b800000
375 0x72901029, //movk w9, #0x8081
376 0x4e040d30, //dup v16.4s, w9
Mike Klein894d5612017-03-07 07:59:52 -0500377 0xf9400108, //ldr x8, [x8]
378 0x8b000108, //add x8, x8, x0
379 0x39400109, //ldrb w9, [x8]
380 0x3940050a, //ldrb w10, [x8, #1]
381 0x3940090b, //ldrb w11, [x8, #2]
382 0x39400d08, //ldrb w8, [x8, #3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500383 0x4e021d31, //mov v17.h[0], w9
384 0x4e061d51, //mov v17.h[1], w10
385 0x4e0a1d71, //mov v17.h[2], w11
386 0x4e0e1d11, //mov v17.h[3], w8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500387 0x2f10a631, //uxtl v17.4s, v17.4h
388 0x6e21da31, //ucvtf v17.4s, v17.4s
389 0x6e30de30, //fmul v16.4s, v17.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500390 0x6e20de00, //fmul v0.4s, v16.4s, v0.4s
391 0x6e21de01, //fmul v1.4s, v16.4s, v1.4s
392 0x6e22de02, //fmul v2.4s, v16.4s, v2.4s
393 0x6e23de03, //fmul v3.4s, v16.4s, v3.4s
394 0xd61f0060, //br x3
395};
396
397CODE const uint32_t sk_lerp_1_float_aarch64[] = {
398 0xa8c10c28, //ldp x8, x3, [x1], #16
399 0x4ea4d411, //fsub v17.4s, v0.4s, v4.4s
400 0x4ea41c80, //mov v0.16b, v4.16b
401 0x4ea5d432, //fsub v18.4s, v1.4s, v5.4s
402 0xbd400110, //ldr s16, [x8]
403 0x4ea51ca1, //mov v1.16b, v5.16b
404 0x4f901220, //fmla v0.4s, v17.4s, v16.s[0]
405 0x4ea6d451, //fsub v17.4s, v2.4s, v6.4s
406 0x4f901241, //fmla v1.4s, v18.4s, v16.s[0]
407 0x4ea61cc2, //mov v2.16b, v6.16b
408 0x4ea7d472, //fsub v18.4s, v3.4s, v7.4s
409 0x4ea71ce3, //mov v3.16b, v7.16b
410 0x4f901222, //fmla v2.4s, v17.4s, v16.s[0]
411 0x4f901243, //fmla v3.4s, v18.4s, v16.s[0]
412 0xd61f0060, //br x3
413};
414
415CODE const uint32_t sk_lerp_u8_aarch64[] = {
416 0xa8c10c28, //ldp x8, x3, [x1], #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500417 0x52a77009, //mov w9, #0x3b800000
418 0x72901029, //movk w9, #0x8081
419 0x4e040d30, //dup v16.4s, w9
Mike Klein894d5612017-03-07 07:59:52 -0500420 0xf9400108, //ldr x8, [x8]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500421 0x4ea4d412, //fsub v18.4s, v0.4s, v4.4s
Mike Klein894d5612017-03-07 07:59:52 -0500422 0x8b000108, //add x8, x8, x0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500423 0x3940010a, //ldrb w10, [x8]
424 0x39400509, //ldrb w9, [x8, #1]
Mike Klein894d5612017-03-07 07:59:52 -0500425 0x3940090b, //ldrb w11, [x8, #2]
426 0x39400d08, //ldrb w8, [x8, #3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500427 0x4e021d51, //mov v17.h[0], w10
428 0x4e061d31, //mov v17.h[1], w9
429 0x4e0a1d71, //mov v17.h[2], w11
430 0x4e0e1d11, //mov v17.h[3], w8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500431 0x2f10a620, //uxtl v0.4s, v17.4h
Mike Klein894d5612017-03-07 07:59:52 -0500432 0x6e21d800, //ucvtf v0.4s, v0.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500433 0x6e30dc10, //fmul v16.4s, v0.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500434 0x4ea41c80, //mov v0.16b, v4.16b
435 0x4ea5d431, //fsub v17.4s, v1.4s, v5.4s
436 0x4ea51ca1, //mov v1.16b, v5.16b
437 0x4e32ce00, //fmla v0.4s, v16.4s, v18.4s
438 0x4ea6d452, //fsub v18.4s, v2.4s, v6.4s
439 0x4e31ce01, //fmla v1.4s, v16.4s, v17.4s
440 0x4ea61cc2, //mov v2.16b, v6.16b
441 0x4ea7d471, //fsub v17.4s, v3.4s, v7.4s
442 0x4ea71ce3, //mov v3.16b, v7.16b
443 0x4e32ce02, //fmla v2.4s, v16.4s, v18.4s
444 0x4e31ce03, //fmla v3.4s, v16.4s, v17.4s
445 0xd61f0060, //br x3
446};
447
448CODE const uint32_t sk_lerp_565_aarch64[] = {
449 0xa8c10c28, //ldp x8, x3, [x1], #16
450 0xd37ff809, //lsl x9, x0, #1
Mike Klein5224f462017-03-07 17:29:54 -0500451 0x4f072710, //movi v16.4s, #0xf8, lsl #8
452 0x4ea4d413, //fsub v19.4s, v0.4s, v4.4s
Mike Klein894d5612017-03-07 07:59:52 -0500453 0xf9400108, //ldr x8, [x8]
Mike Klein894d5612017-03-07 07:59:52 -0500454 0xfc696903, //ldr d3, [x8, x9]
Mike Klein5224f462017-03-07 17:29:54 -0500455 0x52a6f088, //mov w8, #0x37840000
456 0x72842108, //movk w8, #0x2108
457 0x4e040d11, //dup v17.4s, w8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500458 0x2f10a463, //uxtl v3.4s, v3.4h
Mike Klein5224f462017-03-07 17:29:54 -0500459 0x321b17e8, //orr w8, wzr, #0x7e0
460 0x4e301c60, //and v0.16b, v3.16b, v16.16b
461 0x4e040d12, //dup v18.4s, w8
462 0x52a74048, //mov w8, #0x3a020000
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500463 0x4e21d800, //scvtf v0.4s, v0.4s
Mike Klein5224f462017-03-07 17:29:54 -0500464 0x72810428, //movk w8, #0x821
465 0x6e31dc10, //fmul v16.4s, v0.4s, v17.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500466 0x4ea41c80, //mov v0.16b, v4.16b
Mike Klein5224f462017-03-07 17:29:54 -0500467 0x4e33ce00, //fmla v0.4s, v16.4s, v19.4s
468 0x4f0007f0, //movi v16.4s, #0x1f
469 0x4e040d11, //dup v17.4s, w8
470 0x52a7a088, //mov w8, #0x3d040000
471 0x4e321c72, //and v18.16b, v3.16b, v18.16b
472 0x72842108, //movk w8, #0x2108
473 0x4e301c63, //and v3.16b, v3.16b, v16.16b
474 0x4ea6d450, //fsub v16.4s, v2.4s, v6.4s
475 0x4e21da42, //scvtf v2.4s, v18.4s
476 0x6e31dc51, //fmul v17.4s, v2.4s, v17.4s
477 0x4e040d02, //dup v2.4s, w8
478 0x4e21d863, //scvtf v3.4s, v3.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500479 0x4ea5d433, //fsub v19.4s, v1.4s, v5.4s
Mike Klein894d5612017-03-07 07:59:52 -0500480 0x4ea51ca1, //mov v1.16b, v5.16b
Mike Klein5224f462017-03-07 17:29:54 -0500481 0x6e22dc63, //fmul v3.4s, v3.4s, v2.4s
Mike Klein894d5612017-03-07 07:59:52 -0500482 0x4ea61cc2, //mov v2.16b, v6.16b
Mike Klein5224f462017-03-07 17:29:54 -0500483 0x4e33ce21, //fmla v1.4s, v17.4s, v19.4s
484 0x4e30cc62, //fmla v2.4s, v3.4s, v16.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500485 0x4f03f603, //fmov v3.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500486 0xd61f0060, //br x3
487};
488
489CODE const uint32_t sk_load_tables_aarch64[] = {
490 0xa8c10c28, //ldp x8, x3, [x1], #16
Mike Klein894d5612017-03-07 07:59:52 -0500491 0xd37ef409, //lsl x9, x0, #2
Mike Klein5224f462017-03-07 17:29:54 -0500492 0x6f00e620, //movi v0.2d, #0xff000000ff
493 0x52a7700b, //mov w11, #0x3b800000
494 0xa940310a, //ldp x10, x12, [x8]
495 0x7290102b, //movk w11, #0x8081
496 0x4e040d63, //dup v3.4s, w11
Mike Klein894d5612017-03-07 07:59:52 -0500497 0x3ce96942, //ldr q2, [x10, x9]
498 0xa9412109, //ldp x9, x8, [x8, #16]
Mike Klein5224f462017-03-07 17:29:54 -0500499 0x4e201c41, //and v1.16b, v2.16b, v0.16b
500 0x1e26002e, //fmov w14, s1
501 0x6f380450, //ushr v16.4s, v2.4s, #8
502 0x6f300451, //ushr v17.4s, v2.4s, #16
503 0x8b2e498e, //add x14, x12, w14, uxtw #2
Mike Klein894d5612017-03-07 07:59:52 -0500504 0x0e0c3c2a, //mov w10, v1.s[1]
Mike Klein5224f462017-03-07 17:29:54 -0500505 0x0e143c2b, //mov w11, v1.s[2]
Mike Klein894d5612017-03-07 07:59:52 -0500506 0x0e1c3c2d, //mov w13, v1.s[3]
Mike Klein5224f462017-03-07 17:29:54 -0500507 0x4e201e01, //and v1.16b, v16.16b, v0.16b
508 0x4e201e30, //and v16.16b, v17.16b, v0.16b
509 0x0d4081c0, //ld1 {v0.s}[0], [x14]
510 0x8b2a498a, //add x10, x12, w10, uxtw #2
511 0xbc6b5991, //ldr s17, [x12, w11, uxtw #2]
512 0xbc6d5992, //ldr s18, [x12, w13, uxtw #2]
Mike Klein894d5612017-03-07 07:59:52 -0500513 0x0e0c3c2b, //mov w11, v1.s[1]
Mike Klein5224f462017-03-07 17:29:54 -0500514 0x0e143c2c, //mov w12, v1.s[2]
Mike Klein894d5612017-03-07 07:59:52 -0500515 0x0e1c3c2d, //mov w13, v1.s[3]
Mike Klein5224f462017-03-07 17:29:54 -0500516 0x1e26002e, //fmov w14, s1
517 0x8b2e492e, //add x14, x9, w14, uxtw #2
518 0xbc6c5933, //ldr s19, [x9, w12, uxtw #2]
519 0xbc6d5934, //ldr s20, [x9, w13, uxtw #2]
Mike Klein894d5612017-03-07 07:59:52 -0500520 0x8b2b4929, //add x9, x9, w11, uxtw #2
Mike Klein5224f462017-03-07 17:29:54 -0500521 0x1e26020b, //fmov w11, s16
Mike Klein894d5612017-03-07 07:59:52 -0500522 0x6f280442, //ushr v2.4s, v2.4s, #24
Mike Klein5224f462017-03-07 17:29:54 -0500523 0x0d409140, //ld1 {v0.s}[1], [x10]
Mike Klein894d5612017-03-07 07:59:52 -0500524 0x4e21d842, //scvtf v2.4s, v2.4s
Mike Klein5224f462017-03-07 17:29:54 -0500525 0x8b2b490a, //add x10, x8, w11, uxtw #2
526 0x0d4081c1, //ld1 {v1.s}[0], [x14]
527 0x6e23dc43, //fmul v3.4s, v2.4s, v3.4s
Mike Klein894d5612017-03-07 07:59:52 -0500528 0x0d408142, //ld1 {v2.s}[0], [x10]
Mike Klein5224f462017-03-07 17:29:54 -0500529 0x0e0c3e0f, //mov w15, v16.s[1]
530 0x0e143e0c, //mov w12, v16.s[2]
531 0x8b2f490a, //add x10, x8, w15, uxtw #2
532 0x0e1c3e0d, //mov w13, v16.s[3]
533 0xbc6c5910, //ldr s16, [x8, w12, uxtw #2]
Mike Klein894d5612017-03-07 07:59:52 -0500534 0x0d409121, //ld1 {v1.s}[1], [x9]
535 0x0d409142, //ld1 {v2.s}[1], [x10]
Mike Klein5224f462017-03-07 17:29:54 -0500536 0x6e140620, //mov v0.s[2], v17.s[0]
537 0xbc6d5911, //ldr s17, [x8, w13, uxtw #2]
538 0x6e140661, //mov v1.s[2], v19.s[0]
539 0x6e140602, //mov v2.s[2], v16.s[0]
540 0x6e1c0640, //mov v0.s[3], v18.s[0]
541 0x6e1c0681, //mov v1.s[3], v20.s[0]
542 0x6e1c0622, //mov v2.s[3], v17.s[0]
Mike Klein894d5612017-03-07 07:59:52 -0500543 0xd61f0060, //br x3
544};
545
546CODE const uint32_t sk_load_a8_aarch64[] = {
547 0xa8c10c28, //ldp x8, x3, [x1], #16
Mike Klein5224f462017-03-07 17:29:54 -0500548 0x52a77009, //mov w9, #0x3b800000
549 0x72901029, //movk w9, #0x8081
550 0x4e040d22, //dup v2.4s, w9
551 0xf9400108, //ldr x8, [x8]
Mike Klein894d5612017-03-07 07:59:52 -0500552 0x6f00e400, //movi v0.2d, #0x0
553 0x6f00e401, //movi v1.2d, #0x0
Mike Klein894d5612017-03-07 07:59:52 -0500554 0x8b000108, //add x8, x8, x0
Mike Klein5224f462017-03-07 17:29:54 -0500555 0x3940010a, //ldrb w10, [x8]
556 0x3940050b, //ldrb w11, [x8, #1]
557 0x3940090c, //ldrb w12, [x8, #2]
Mike Klein894d5612017-03-07 07:59:52 -0500558 0x39400d08, //ldrb w8, [x8, #3]
Mike Klein5224f462017-03-07 17:29:54 -0500559 0x4e021d43, //mov v3.h[0], w10
560 0x4e061d63, //mov v3.h[1], w11
561 0x4e0a1d83, //mov v3.h[2], w12
562 0x4e0e1d03, //mov v3.h[3], w8
Mike Klein5224f462017-03-07 17:29:54 -0500563 0x2f10a463, //uxtl v3.4s, v3.4h
564 0x6e21d863, //ucvtf v3.4s, v3.4s
565 0x6e22dc63, //fmul v3.4s, v3.4s, v2.4s
Mike Klein894d5612017-03-07 07:59:52 -0500566 0x6f00e402, //movi v2.2d, #0x0
567 0xd61f0060, //br x3
568};
569
570CODE const uint32_t sk_store_a8_aarch64[] = {
571 0xf9400028, //ldr x8, [x1]
Mike Klein5224f462017-03-07 17:29:54 -0500572 0x52a86fe9, //mov w9, #0x437f0000
573 0x4e040d30, //dup v16.4s, w9
574 0x6e30dc70, //fmul v16.4s, v3.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500575 0xf9400108, //ldr x8, [x8]
Mike Klein894d5612017-03-07 07:59:52 -0500576 0x6e21aa10, //fcvtnu v16.4s, v16.4s
577 0x0e612a10, //xtn v16.4h, v16.4s
578 0x0e0e3e09, //umov w9, v16.h[3]
579 0x8b000108, //add x8, x8, x0
580 0x39000d09, //strb w9, [x8, #3]
581 0x0e0a3e09, //umov w9, v16.h[2]
582 0x39000909, //strb w9, [x8, #2]
583 0x0e063e09, //umov w9, v16.h[1]
584 0x39000509, //strb w9, [x8, #1]
585 0x0e023e09, //umov w9, v16.h[0]
586 0x39000109, //strb w9, [x8]
587 0xf9400423, //ldr x3, [x1, #8]
588 0x91004021, //add x1, x1, #0x10
589 0xd61f0060, //br x3
590};
591
592CODE const uint32_t sk_load_565_aarch64[] = {
593 0xa8c10c28, //ldp x8, x3, [x1], #16
594 0xd37ff809, //lsl x9, x0, #1
Mike Klein5224f462017-03-07 17:29:54 -0500595 0x4f072701, //movi v1.4s, #0xf8, lsl #8
596 0x4f0007e3, //movi v3.4s, #0x1f
Mike Klein894d5612017-03-07 07:59:52 -0500597 0xf9400108, //ldr x8, [x8]
598 0xfc696900, //ldr d0, [x8, x9]
Mike Klein5224f462017-03-07 17:29:54 -0500599 0x321b17e8, //orr w8, wzr, #0x7e0
600 0x4e040d02, //dup v2.4s, w8
601 0x52a6f088, //mov w8, #0x37840000
602 0x72842108, //movk w8, #0x2108
Mike Klein894d5612017-03-07 07:59:52 -0500603 0x2f10a400, //uxtl v0.4s, v0.4h
Mike Klein5224f462017-03-07 17:29:54 -0500604 0x4e211c01, //and v1.16b, v0.16b, v1.16b
605 0x4e221c02, //and v2.16b, v0.16b, v2.16b
606 0x4e231c03, //and v3.16b, v0.16b, v3.16b
607 0x4e040d00, //dup v0.4s, w8
608 0x52a74048, //mov w8, #0x3a020000
609 0x72810428, //movk w8, #0x821
610 0x4e21d821, //scvtf v1.4s, v1.4s
611 0x6e20dc20, //fmul v0.4s, v1.4s, v0.4s
612 0x4e040d01, //dup v1.4s, w8
613 0x52a7a088, //mov w8, #0x3d040000
614 0x72842108, //movk w8, #0x2108
615 0x4e21d842, //scvtf v2.4s, v2.4s
616 0x6e21dc41, //fmul v1.4s, v2.4s, v1.4s
617 0x4e040d02, //dup v2.4s, w8
618 0x4e21d863, //scvtf v3.4s, v3.4s
619 0x6e22dc62, //fmul v2.4s, v3.4s, v2.4s
620 0x4f03f603, //fmov v3.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500621 0xd61f0060, //br x3
622};
623
624CODE const uint32_t sk_store_565_aarch64[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500625 0xf9400028, //ldr x8, [x1]
Mike Klein5224f462017-03-07 17:29:54 -0500626 0x52a84f8a, //mov w10, #0x427c0000
627 0x4f01f7f0, //fmov v16.4s, #3.100000000000000000e+01
628 0x4e040d52, //dup v18.4s, w10
629 0x6e30dc11, //fmul v17.4s, v0.4s, v16.4s
630 0x6e32dc32, //fmul v18.4s, v1.4s, v18.4s
Mike Klein894d5612017-03-07 07:59:52 -0500631 0x6e21aa31, //fcvtnu v17.4s, v17.4s
Mike Klein5224f462017-03-07 17:29:54 -0500632 0x6e21aa52, //fcvtnu v18.4s, v18.4s
633 0x6e30dc50, //fmul v16.4s, v2.4s, v16.4s
634 0x4f2b5631, //shl v17.4s, v17.4s, #11
Mike Klein894d5612017-03-07 07:59:52 -0500635 0xf9400108, //ldr x8, [x8]
Mike Klein5224f462017-03-07 17:29:54 -0500636 0x4f255652, //shl v18.4s, v18.4s, #5
637 0x4eb11e51, //orr v17.16b, v18.16b, v17.16b
Mike Klein894d5612017-03-07 07:59:52 -0500638 0x6e21aa10, //fcvtnu v16.4s, v16.4s
639 0x4eb01e30, //orr v16.16b, v17.16b, v16.16b
Mike Klein5224f462017-03-07 17:29:54 -0500640 0xd37ff809, //lsl x9, x0, #1
Mike Klein894d5612017-03-07 07:59:52 -0500641 0x0e612a10, //xtn v16.4h, v16.4s
642 0xfc296910, //str d16, [x8, x9]
643 0xf9400423, //ldr x3, [x1, #8]
644 0x91004021, //add x1, x1, #0x10
645 0xd61f0060, //br x3
646};
647
648CODE const uint32_t sk_load_8888_aarch64[] = {
649 0xa8c10c28, //ldp x8, x3, [x1], #16
650 0xd37ef409, //lsl x9, x0, #2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500651 0x6f00e621, //movi v1.2d, #0xff000000ff
Mike Klein894d5612017-03-07 07:59:52 -0500652 0xf9400108, //ldr x8, [x8]
653 0x3ce96900, //ldr q0, [x8, x9]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500654 0x52a77008, //mov w8, #0x3b800000
655 0x72901028, //movk w8, #0x8081
656 0x4e040d02, //dup v2.4s, w8
Mike Klein894d5612017-03-07 07:59:52 -0500657 0x6f380410, //ushr v16.4s, v0.4s, #8
658 0x6f300411, //ushr v17.4s, v0.4s, #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500659 0x4e211c03, //and v3.16b, v0.16b, v1.16b
Mike Klein894d5612017-03-07 07:59:52 -0500660 0x6f280400, //ushr v0.4s, v0.4s, #24
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500661 0x4e211e10, //and v16.16b, v16.16b, v1.16b
662 0x4e211e21, //and v1.16b, v17.16b, v1.16b
Mike Klein894d5612017-03-07 07:59:52 -0500663 0x4e21d863, //scvtf v3.4s, v3.4s
664 0x4e21d811, //scvtf v17.4s, v0.4s
665 0x4e21da10, //scvtf v16.4s, v16.4s
666 0x4e21d832, //scvtf v18.4s, v1.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500667 0x6e22dc60, //fmul v0.4s, v3.4s, v2.4s
668 0x6e22de23, //fmul v3.4s, v17.4s, v2.4s
669 0x6e22de01, //fmul v1.4s, v16.4s, v2.4s
670 0x6e22de42, //fmul v2.4s, v18.4s, v2.4s
Mike Klein894d5612017-03-07 07:59:52 -0500671 0xd61f0060, //br x3
672};
673
674CODE const uint32_t sk_store_8888_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500675 0x52a86fea, //mov w10, #0x437f0000
676 0x4e040d50, //dup v16.4s, w10
Mike Klein894d5612017-03-07 07:59:52 -0500677 0xf9400028, //ldr x8, [x1]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500678 0x6e30dc32, //fmul v18.4s, v1.4s, v16.4s
679 0x6e30dc11, //fmul v17.4s, v0.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500680 0x6e21aa52, //fcvtnu v18.4s, v18.4s
681 0x6e21aa31, //fcvtnu v17.4s, v17.4s
682 0x4f285652, //shl v18.4s, v18.4s, #8
683 0x4eb11e51, //orr v17.16b, v18.16b, v17.16b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500684 0x6e30dc52, //fmul v18.4s, v2.4s, v16.4s
685 0x6e30dc70, //fmul v16.4s, v3.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500686 0x6e21aa52, //fcvtnu v18.4s, v18.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500687 0xf9400108, //ldr x8, [x8]
Mike Klein894d5612017-03-07 07:59:52 -0500688 0x6e21aa10, //fcvtnu v16.4s, v16.4s
689 0x4f305652, //shl v18.4s, v18.4s, #16
690 0x4eb21e31, //orr v17.16b, v17.16b, v18.16b
691 0x4f385610, //shl v16.4s, v16.4s, #24
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500692 0xd37ef409, //lsl x9, x0, #2
Mike Klein894d5612017-03-07 07:59:52 -0500693 0x4eb01e30, //orr v16.16b, v17.16b, v16.16b
694 0x3ca96910, //str q16, [x8, x9]
695 0xf9400423, //ldr x3, [x1, #8]
696 0x91004021, //add x1, x1, #0x10
697 0xd61f0060, //br x3
698};
699
700CODE const uint32_t sk_load_f16_aarch64[] = {
701 0xa8c10c28, //ldp x8, x3, [x1], #16
702 0xf9400108, //ldr x8, [x8]
703 0x8b000d08, //add x8, x8, x0, lsl #3
704 0x0c400510, //ld4 {v16.4h-v19.4h}, [x8]
705 0x0e217a00, //fcvtl v0.4s, v16.4h
706 0x0e217a21, //fcvtl v1.4s, v17.4h
707 0x0e217a42, //fcvtl v2.4s, v18.4h
708 0x0e217a63, //fcvtl v3.4s, v19.4h
709 0xd61f0060, //br x3
710};
711
712CODE const uint32_t sk_store_f16_aarch64[] = {
713 0xf9400028, //ldr x8, [x1]
714 0x0e216810, //fcvtn v16.4h, v0.4s
715 0x0e216831, //fcvtn v17.4h, v1.4s
716 0x0e216852, //fcvtn v18.4h, v2.4s
717 0xf9400108, //ldr x8, [x8]
718 0x0e216873, //fcvtn v19.4h, v3.4s
719 0x8b000d08, //add x8, x8, x0, lsl #3
720 0x0c000510, //st4 {v16.4h-v19.4h}, [x8]
721 0xf9400423, //ldr x3, [x1, #8]
722 0x91004021, //add x1, x1, #0x10
723 0xd61f0060, //br x3
724};
725
726CODE const uint32_t sk_store_f32_aarch64[] = {
727 0xf9400028, //ldr x8, [x1]
728 0xf9400108, //ldr x8, [x8]
729 0x8b001108, //add x8, x8, x0, lsl #4
730 0x4c000900, //st4 {v0.4s-v3.4s}, [x8]
731 0xf9400423, //ldr x3, [x1, #8]
732 0x91004021, //add x1, x1, #0x10
733 0xd61f0060, //br x3
734};
735
736CODE const uint32_t sk_clamp_x_aarch64[] = {
737 0xa8c10c28, //ldp x8, x3, [x1], #16
738 0x6f00e411, //movi v17.2d, #0x0
739 0x4e20f620, //fmax v0.4s, v17.4s, v0.4s
740 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
741 0x4d40c910, //ld1r {v16.4s}, [x8]
742 0x4eb18610, //add v16.4s, v16.4s, v17.4s
743 0x4eb0f400, //fmin v0.4s, v0.4s, v16.4s
744 0xd61f0060, //br x3
745};
746
747CODE const uint32_t sk_clamp_y_aarch64[] = {
748 0xa8c10c28, //ldp x8, x3, [x1], #16
749 0x6f00e411, //movi v17.2d, #0x0
750 0x4e21f621, //fmax v1.4s, v17.4s, v1.4s
751 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
752 0x4d40c910, //ld1r {v16.4s}, [x8]
753 0x4eb18610, //add v16.4s, v16.4s, v17.4s
754 0x4eb0f421, //fmin v1.4s, v1.4s, v16.4s
755 0xd61f0060, //br x3
756};
757
758CODE const uint32_t sk_repeat_x_aarch64[] = {
759 0xa8c10c28, //ldp x8, x3, [x1], #16
760 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
761 0xbd400110, //ldr s16, [x8]
762 0x4e040612, //dup v18.4s, v16.s[0]
763 0x4eb18651, //add v17.4s, v18.4s, v17.4s
764 0x6e32fc12, //fdiv v18.4s, v0.4s, v18.4s
765 0x4e219a52, //frintm v18.4s, v18.4s
766 0x4f905240, //fmls v0.4s, v18.4s, v16.s[0]
767 0x4eb1f400, //fmin v0.4s, v0.4s, v17.4s
768 0xd61f0060, //br x3
769};
770
771CODE const uint32_t sk_repeat_y_aarch64[] = {
772 0xa8c10c28, //ldp x8, x3, [x1], #16
773 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
774 0xbd400110, //ldr s16, [x8]
775 0x4e040612, //dup v18.4s, v16.s[0]
776 0x4eb18651, //add v17.4s, v18.4s, v17.4s
777 0x6e32fc32, //fdiv v18.4s, v1.4s, v18.4s
778 0x4e219a52, //frintm v18.4s, v18.4s
779 0x4f905241, //fmls v1.4s, v18.4s, v16.s[0]
780 0x4eb1f421, //fmin v1.4s, v1.4s, v17.4s
781 0xd61f0060, //br x3
782};
783
784CODE const uint32_t sk_mirror_x_aarch64[] = {
785 0xa8c10c28, //ldp x8, x3, [x1], #16
786 0xbd400110, //ldr s16, [x8]
787 0x4e040611, //dup v17.4s, v16.s[0]
788 0x1e302a10, //fadd s16, s16, s16
789 0x4eb1d400, //fsub v0.4s, v0.4s, v17.4s
790 0x4e040612, //dup v18.4s, v16.s[0]
791 0x6e32fc12, //fdiv v18.4s, v0.4s, v18.4s
792 0x4e219a52, //frintm v18.4s, v18.4s
793 0x4f905240, //fmls v0.4s, v18.4s, v16.s[0]
794 0x6f07e7f0, //movi v16.2d, #0xffffffffffffffff
795 0x4eb1d400, //fsub v0.4s, v0.4s, v17.4s
796 0x4eb08630, //add v16.4s, v17.4s, v16.4s
797 0x4ea0f800, //fabs v0.4s, v0.4s
798 0x4eb0f400, //fmin v0.4s, v0.4s, v16.4s
799 0xd61f0060, //br x3
800};
801
802CODE const uint32_t sk_mirror_y_aarch64[] = {
803 0xa8c10c28, //ldp x8, x3, [x1], #16
804 0xbd400110, //ldr s16, [x8]
805 0x4e040611, //dup v17.4s, v16.s[0]
806 0x1e302a10, //fadd s16, s16, s16
807 0x4eb1d421, //fsub v1.4s, v1.4s, v17.4s
808 0x4e040612, //dup v18.4s, v16.s[0]
809 0x6e32fc32, //fdiv v18.4s, v1.4s, v18.4s
810 0x4e219a52, //frintm v18.4s, v18.4s
811 0x4f905241, //fmls v1.4s, v18.4s, v16.s[0]
812 0x6f07e7f0, //movi v16.2d, #0xffffffffffffffff
813 0x4eb1d421, //fsub v1.4s, v1.4s, v17.4s
814 0x4eb08630, //add v16.4s, v17.4s, v16.4s
815 0x4ea0f821, //fabs v1.4s, v1.4s
816 0x4eb0f421, //fmin v1.4s, v1.4s, v16.4s
817 0xd61f0060, //br x3
818};
819
Mike Kleine9ed07d2017-03-07 12:28:11 -0500820CODE const uint32_t sk_luminance_to_alpha_aarch64[] = {
Mike Klein5224f462017-03-07 17:29:54 -0500821 0x52a7cb28, //mov w8, #0x3e590000
822 0x72967a08, //movk w8, #0xb3d0
823 0x4e040d11, //dup v17.4s, w8
824 0x52a7e6e8, //mov w8, #0x3f370000
825 0x7282eb28, //movk w8, #0x1759
826 0x4ea01c10, //mov v16.16b, v0.16b
827 0x4e040d00, //dup v0.4s, w8
828 0x52a7b268, //mov w8, #0x3d930000
Mike Kleine9ed07d2017-03-07 12:28:11 -0500829 0xf8408423, //ldr x3, [x1], #8
Mike Klein5224f462017-03-07 17:29:54 -0500830 0x729bb308, //movk w8, #0xdd98
831 0x6e20dc23, //fmul v3.4s, v1.4s, v0.4s
832 0x4e30ce23, //fmla v3.4s, v17.4s, v16.4s
833 0x4e040d10, //dup v16.4s, w8
Mike Kleine9ed07d2017-03-07 12:28:11 -0500834 0x6f00e400, //movi v0.2d, #0x0
835 0x6f00e401, //movi v1.2d, #0x0
Mike Klein5224f462017-03-07 17:29:54 -0500836 0x4e22ce03, //fmla v3.4s, v16.4s, v2.4s
Mike Kleine9ed07d2017-03-07 12:28:11 -0500837 0x6f00e402, //movi v2.2d, #0x0
838 0xd61f0060, //br x3
839};
840
Mike Klein894d5612017-03-07 07:59:52 -0500841CODE const uint32_t sk_matrix_2x3_aarch64[] = {
842 0xa8c10c28, //ldp x8, x3, [x1], #16
843 0xaa0803e9, //mov x9, x8
844 0x9100410a, //add x10, x8, #0x10
845 0x4ddfc932, //ld1r {v18.4s}, [x9], #4
846 0x4d40c950, //ld1r {v16.4s}, [x10]
847 0x2d415113, //ldp s19, s20, [x8, #8]
848 0x9100510a, //add x10, x8, #0x14
849 0x4d40c951, //ld1r {v17.4s}, [x10]
850 0x4f931030, //fmla v16.4s, v1.4s, v19.s[0]
851 0xbd400133, //ldr s19, [x9]
852 0x4f941031, //fmla v17.4s, v1.4s, v20.s[0]
853 0x4e20ce50, //fmla v16.4s, v18.4s, v0.4s
854 0x4f931011, //fmla v17.4s, v0.4s, v19.s[0]
855 0x4eb01e00, //mov v0.16b, v16.16b
856 0x4eb11e21, //mov v1.16b, v17.16b
857 0xd61f0060, //br x3
858};
859
860CODE const uint32_t sk_matrix_3x4_aarch64[] = {
861 0xa8c10c28, //ldp x8, x3, [x1], #16
862 0xaa0803e9, //mov x9, x8
863 0x9100910a, //add x10, x8, #0x24
864 0x4ddfc933, //ld1r {v19.4s}, [x9], #4
865 0x4d40c950, //ld1r {v16.4s}, [x10]
866 0x9100a10a, //add x10, x8, #0x28
867 0x4d40c951, //ld1r {v17.4s}, [x10]
868 0x9100b10a, //add x10, x8, #0x2c
869 0x2d435514, //ldp s20, s21, [x8, #24]
870 0xbd402116, //ldr s22, [x8, #32]
871 0x4d40c952, //ld1r {v18.4s}, [x10]
872 0x4f941050, //fmla v16.4s, v2.4s, v20.s[0]
873 0x4f951051, //fmla v17.4s, v2.4s, v21.s[0]
874 0x4f961052, //fmla v18.4s, v2.4s, v22.s[0]
875 0x2d425502, //ldp s2, s21, [x8, #16]
876 0x2d415d14, //ldp s20, s23, [x8, #8]
877 0x4f821031, //fmla v17.4s, v1.4s, v2.s[0]
878 0xbd400122, //ldr s2, [x9]
879 0x4f971030, //fmla v16.4s, v1.4s, v23.s[0]
880 0x4f951032, //fmla v18.4s, v1.4s, v21.s[0]
881 0x4e20ce70, //fmla v16.4s, v19.4s, v0.4s
882 0x4f941012, //fmla v18.4s, v0.4s, v20.s[0]
883 0x4f821011, //fmla v17.4s, v0.4s, v2.s[0]
884 0x4eb01e00, //mov v0.16b, v16.16b
885 0x4eb11e21, //mov v1.16b, v17.16b
886 0x4eb21e42, //mov v2.16b, v18.16b
887 0xd61f0060, //br x3
888};
889
Mike Kleine9ed07d2017-03-07 12:28:11 -0500890CODE const uint32_t sk_matrix_4x5_aarch64[] = {
891 0xf9400029, //ldr x9, [x1]
892 0xaa0903e8, //mov x8, x9
893 0x9101012a, //add x10, x9, #0x40
894 0x4ddfc914, //ld1r {v20.4s}, [x8], #4
895 0x4d40c950, //ld1r {v16.4s}, [x10]
896 0x9101112a, //add x10, x9, #0x44
897 0x4d40c951, //ld1r {v17.4s}, [x10]
898 0x9101212a, //add x10, x9, #0x48
899 0x4d40c952, //ld1r {v18.4s}, [x10]
900 0x2d465533, //ldp s19, s21, [x9, #48]
901 0x2d475d36, //ldp s22, s23, [x9, #56]
902 0x9101312a, //add x10, x9, #0x4c
903 0xf9400423, //ldr x3, [x1, #8]
904 0x4f931070, //fmla v16.4s, v3.4s, v19.s[0]
905 0x4d40c953, //ld1r {v19.4s}, [x10]
906 0x4f951071, //fmla v17.4s, v3.4s, v21.s[0]
907 0x4f961072, //fmla v18.4s, v3.4s, v22.s[0]
908 0x2d445935, //ldp s21, s22, [x9, #32]
909 0x4f971073, //fmla v19.4s, v3.4s, v23.s[0]
910 0x2d455d23, //ldp s3, s23, [x9, #40]
911 0x91004021, //add x1, x1, #0x10
912 0x4f951050, //fmla v16.4s, v2.4s, v21.s[0]
913 0x4f961051, //fmla v17.4s, v2.4s, v22.s[0]
914 0x2d425935, //ldp s21, s22, [x9, #16]
915 0x4f971053, //fmla v19.4s, v2.4s, v23.s[0]
916 0x4f831052, //fmla v18.4s, v2.4s, v3.s[0]
917 0x2d410d22, //ldp s2, s3, [x9, #8]
918 0x4f951030, //fmla v16.4s, v1.4s, v21.s[0]
919 0x2d435d35, //ldp s21, s23, [x9, #24]
920 0x4f961031, //fmla v17.4s, v1.4s, v22.s[0]
921 0xbd400116, //ldr s22, [x8]
922 0x4e20ce90, //fmla v16.4s, v20.4s, v0.4s
923 0x4f951032, //fmla v18.4s, v1.4s, v21.s[0]
924 0x4f971033, //fmla v19.4s, v1.4s, v23.s[0]
925 0x4f821012, //fmla v18.4s, v0.4s, v2.s[0]
926 0x4f831013, //fmla v19.4s, v0.4s, v3.s[0]
927 0x4f961011, //fmla v17.4s, v0.4s, v22.s[0]
928 0x4eb01e00, //mov v0.16b, v16.16b
929 0x4eb11e21, //mov v1.16b, v17.16b
930 0x4eb21e42, //mov v2.16b, v18.16b
931 0x4eb31e63, //mov v3.16b, v19.16b
932 0xd61f0060, //br x3
933};
934
Mike Klein894d5612017-03-07 07:59:52 -0500935CODE const uint32_t sk_matrix_perspective_aarch64[] = {
936 0xa8c10c28, //ldp x8, x3, [x1], #16
937 0xaa0803e9, //mov x9, x8
938 0x9100510a, //add x10, x8, #0x14
939 0x4ddfc930, //ld1r {v16.4s}, [x9], #4
940 0x4d40c951, //ld1r {v17.4s}, [x10]
941 0x9100810a, //add x10, x8, #0x20
942 0x4d40c952, //ld1r {v18.4s}, [x10]
943 0x2d41d113, //ldp s19, s20, [x8, #12]
944 0x2d435915, //ldp s21, s22, [x8, #24]
945 0x91002108, //add x8, x8, #0x8
946 0x4f941031, //fmla v17.4s, v1.4s, v20.s[0]
947 0x4d40c914, //ld1r {v20.4s}, [x8]
948 0x4f961032, //fmla v18.4s, v1.4s, v22.s[0]
949 0xbd400136, //ldr s22, [x9]
950 0x4f951012, //fmla v18.4s, v0.4s, v21.s[0]
951 0x4f931011, //fmla v17.4s, v0.4s, v19.s[0]
952 0x4f961034, //fmla v20.4s, v1.4s, v22.s[0]
953 0x4ea1da41, //frecpe v1.4s, v18.4s
954 0x4e21fe52, //frecps v18.4s, v18.4s, v1.4s
955 0x6e32dc32, //fmul v18.4s, v1.4s, v18.4s
956 0x4e20ce14, //fmla v20.4s, v16.4s, v0.4s
957 0x6e32de21, //fmul v1.4s, v17.4s, v18.4s
958 0x6e32de80, //fmul v0.4s, v20.4s, v18.4s
959 0xd61f0060, //br x3
960};
961
962CODE const uint32_t sk_linear_gradient_2stops_aarch64[] = {
963 0xa8c10c28, //ldp x8, x3, [x1], #16
964 0xad404503, //ldp q3, q17, [x8]
965 0x4e040470, //dup v16.4s, v3.s[0]
966 0x4e0c0461, //dup v1.4s, v3.s[1]
967 0x4e140462, //dup v2.4s, v3.s[2]
968 0x4e1c0463, //dup v3.4s, v3.s[3]
969 0x4f911010, //fmla v16.4s, v0.4s, v17.s[0]
970 0x4fb11001, //fmla v1.4s, v0.4s, v17.s[1]
971 0x4f911802, //fmla v2.4s, v0.4s, v17.s[2]
972 0x4fb11803, //fmla v3.4s, v0.4s, v17.s[3]
973 0x4eb01e00, //mov v0.16b, v16.16b
974 0xd61f0060, //br x3
975};
976#elif defined(__arm__)
977
978CODE const uint32_t sk_start_pipeline_vfp4[] = {
979 0xe92d41f0, //push {r4, r5, r6, r7, r8, lr}
Mike Klein894d5612017-03-07 07:59:52 -0500980 0xe1a04000, //mov r4, r0
Mike Klein64b97482017-03-14 17:35:04 -0700981 0xe2840002, //add r0, r4, #2
Mike Klein894d5612017-03-07 07:59:52 -0500982 0xe1a05003, //mov r5, r3
983 0xe1a08002, //mov r8, r2
Mike Klein64b97482017-03-14 17:35:04 -0700984 0xe1a07001, //mov r7, r1
985 0xe1500005, //cmp r0, r5
986 0x8a000010, //bhi 64 <sk_start_pipeline_vfp4+0x64>
Mike Klein894d5612017-03-07 07:59:52 -0500987 0xe4976004, //ldr r6, [r7], #4
Mike Klein894d5612017-03-07 07:59:52 -0500988 0xf2800010, //vmov.i32 d0, #0
989 0xe1a00004, //mov r0, r4
990 0xf2801010, //vmov.i32 d1, #0
991 0xe1a01007, //mov r1, r7
992 0xf2802010, //vmov.i32 d2, #0
993 0xe1a02008, //mov r2, r8
994 0xf2803010, //vmov.i32 d3, #0
995 0xf2804010, //vmov.i32 d4, #0
996 0xf2805010, //vmov.i32 d5, #0
997 0xf2806010, //vmov.i32 d6, #0
998 0xf2807010, //vmov.i32 d7, #0
999 0xe12fff36, //blx r6
1000 0xe2840004, //add r0, r4, #4
1001 0xe2844002, //add r4, r4, #2
1002 0xe1500005, //cmp r0, r5
Mike Klein64b97482017-03-14 17:35:04 -07001003 0x9affffef, //bls 24 <sk_start_pipeline_vfp4+0x24>
Mike Klein894d5612017-03-07 07:59:52 -05001004 0xe1a00004, //mov r0, r4
1005 0xe8bd81f0, //pop {r4, r5, r6, r7, r8, pc}
1006};
1007
1008CODE const uint32_t sk_just_return_vfp4[] = {
1009 0xe12fff1e, //bx lr
1010};
1011
1012CODE const uint32_t sk_seed_shader_vfp4[] = {
Mike Klein894d5612017-03-07 07:59:52 -05001013 0xee800b90, //vdup.32 d16, r0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001014 0xe8911008, //ldm r1, {r3, ip}
Mike Klein894d5612017-03-07 07:59:52 -05001015 0xf3fb0620, //vcvt.f32.s32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001016 0xf2c3161f, //vmov.i32 d17, #1056964608
Mike Klein5224f462017-03-07 17:29:54 -05001017 0xedd23b00, //vldr d19, [r2]
Mike Klein894d5612017-03-07 07:59:52 -05001018 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001019 0xf2872f10, //vmov.f32 d2, #1
1020 0xf3fb2622, //vcvt.f32.s32 d18, d18
1021 0xe2811008, //add r1, r1, #8
1022 0xf2400da1, //vadd.f32 d16, d16, d17
1023 0xf2803010, //vmov.i32 d3, #0
Mike Klein894d5612017-03-07 07:59:52 -05001024 0xf2804010, //vmov.i32 d4, #0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001025 0xf2021da1, //vadd.f32 d1, d18, d17
1026 0xf2000da3, //vadd.f32 d0, d16, d19
Mike Klein894d5612017-03-07 07:59:52 -05001027 0xf2805010, //vmov.i32 d5, #0
Mike Klein894d5612017-03-07 07:59:52 -05001028 0xf2806010, //vmov.i32 d6, #0
Mike Klein894d5612017-03-07 07:59:52 -05001029 0xf2807010, //vmov.i32 d7, #0
1030 0xe12fff1c, //bx ip
1031};
1032
1033CODE const uint32_t sk_constant_color_vfp4[] = {
1034 0xe8911008, //ldm r1, {r3, ip}
1035 0xe2811008, //add r1, r1, #8
1036 0xf4630a0f, //vld1.8 {d16-d17}, [r3]
1037 0xf3b40c20, //vdup.32 d0, d16[0]
1038 0xf3bc1c20, //vdup.32 d1, d16[1]
1039 0xf3b42c21, //vdup.32 d2, d17[0]
1040 0xf3bc3c21, //vdup.32 d3, d17[1]
1041 0xe12fff1c, //bx ip
1042};
1043
1044CODE const uint32_t sk_clear_vfp4[] = {
1045 0xe4913004, //ldr r3, [r1], #4
1046 0xf2800010, //vmov.i32 d0, #0
1047 0xf2801010, //vmov.i32 d1, #0
1048 0xf2802010, //vmov.i32 d2, #0
1049 0xf2803010, //vmov.i32 d3, #0
1050 0xe12fff13, //bx r3
1051};
1052
1053CODE const uint32_t sk_plus__vfp4[] = {
1054 0xf2000d04, //vadd.f32 d0, d0, d4
1055 0xe4913004, //ldr r3, [r1], #4
1056 0xf2011d05, //vadd.f32 d1, d1, d5
1057 0xf2022d06, //vadd.f32 d2, d2, d6
1058 0xf2033d07, //vadd.f32 d3, d3, d7
1059 0xe12fff13, //bx r3
1060};
1061
1062CODE const uint32_t sk_srcover_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001063 0xf2c70f10, //vmov.f32 d16, #1
Mike Klein894d5612017-03-07 07:59:52 -05001064 0xe4913004, //ldr r3, [r1], #4
1065 0xf2600d83, //vsub.f32 d16, d16, d3
1066 0xf2040c30, //vfma.f32 d0, d4, d16
1067 0xf2051c30, //vfma.f32 d1, d5, d16
1068 0xf2062c30, //vfma.f32 d2, d6, d16
1069 0xf2073c30, //vfma.f32 d3, d7, d16
1070 0xe12fff13, //bx r3
1071};
1072
1073CODE const uint32_t sk_dstover_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001074 0xf2c70f10, //vmov.f32 d16, #1
1075 0xe4913004, //ldr r3, [r1], #4
Mike Klein894d5612017-03-07 07:59:52 -05001076 0xf2651115, //vorr d17, d5, d5
1077 0xf2604d87, //vsub.f32 d20, d16, d7
1078 0xf2640114, //vorr d16, d4, d4
1079 0xf2662116, //vorr d18, d6, d6
Mike Klein894d5612017-03-07 07:59:52 -05001080 0xf2673117, //vorr d19, d7, d7
1081 0xf2400c34, //vfma.f32 d16, d0, d20
1082 0xf2411c34, //vfma.f32 d17, d1, d20
1083 0xf2422c34, //vfma.f32 d18, d2, d20
1084 0xf2433c34, //vfma.f32 d19, d3, d20
1085 0xf22001b0, //vorr d0, d16, d16
1086 0xf22111b1, //vorr d1, d17, d17
1087 0xf22221b2, //vorr d2, d18, d18
1088 0xf22331b3, //vorr d3, d19, d19
1089 0xe12fff13, //bx r3
1090};
1091
1092CODE const uint32_t sk_clamp_0_vfp4[] = {
1093 0xf2c00010, //vmov.i32 d16, #0
1094 0xe4913004, //ldr r3, [r1], #4
1095 0xf2000f20, //vmax.f32 d0, d0, d16
1096 0xf2011f20, //vmax.f32 d1, d1, d16
1097 0xf2022f20, //vmax.f32 d2, d2, d16
1098 0xf2033f20, //vmax.f32 d3, d3, d16
1099 0xe12fff13, //bx r3
1100};
1101
1102CODE const uint32_t sk_clamp_1_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001103 0xf2c70f10, //vmov.f32 d16, #1
Mike Klein894d5612017-03-07 07:59:52 -05001104 0xe4913004, //ldr r3, [r1], #4
1105 0xf2200f20, //vmin.f32 d0, d0, d16
1106 0xf2211f20, //vmin.f32 d1, d1, d16
1107 0xf2222f20, //vmin.f32 d2, d2, d16
1108 0xf2233f20, //vmin.f32 d3, d3, d16
1109 0xe12fff13, //bx r3
1110};
1111
1112CODE const uint32_t sk_clamp_a_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001113 0xf2c70f10, //vmov.f32 d16, #1
Mike Klein894d5612017-03-07 07:59:52 -05001114 0xe4913004, //ldr r3, [r1], #4
1115 0xf2233f20, //vmin.f32 d3, d3, d16
1116 0xf2200f03, //vmin.f32 d0, d0, d3
1117 0xf2211f03, //vmin.f32 d1, d1, d3
1118 0xf2222f03, //vmin.f32 d2, d2, d3
1119 0xe12fff13, //bx r3
1120};
1121
1122CODE const uint32_t sk_set_rgb_vfp4[] = {
1123 0xe92d4800, //push {fp, lr}
Mike Klein64b97482017-03-14 17:35:04 -07001124 0xe8911008, //ldm r1, {r3, ip}
Mike Klein894d5612017-03-07 07:59:52 -05001125 0xe2811008, //add r1, r1, #8
Mike Klein64b97482017-03-14 17:35:04 -07001126 0xe283e008, //add lr, r3, #8
1127 0xf4a30c9d, //vld1.32 {d0[]}, [r3 :32]!
1128 0xf4ae2c9f, //vld1.32 {d2[]}, [lr :32]
Mike Klein894d5612017-03-07 07:59:52 -05001129 0xf4a31c9f, //vld1.32 {d1[]}, [r3 :32]
1130 0xe8bd4800, //pop {fp, lr}
1131 0xe12fff1c, //bx ip
1132};
1133
1134CODE const uint32_t sk_swap_rb_vfp4[] = {
1135 0xeef00b40, //vmov.f64 d16, d0
1136 0xe4913004, //ldr r3, [r1], #4
1137 0xeeb00b42, //vmov.f64 d0, d2
1138 0xeeb02b60, //vmov.f64 d2, d16
1139 0xe12fff13, //bx r3
1140};
1141
1142CODE const uint32_t sk_swap_vfp4[] = {
1143 0xeef00b43, //vmov.f64 d16, d3
1144 0xe4913004, //ldr r3, [r1], #4
1145 0xeef01b42, //vmov.f64 d17, d2
1146 0xeef02b41, //vmov.f64 d18, d1
1147 0xeef03b40, //vmov.f64 d19, d0
1148 0xeeb00b44, //vmov.f64 d0, d4
1149 0xeeb01b45, //vmov.f64 d1, d5
1150 0xeeb02b46, //vmov.f64 d2, d6
1151 0xeeb03b47, //vmov.f64 d3, d7
1152 0xeeb04b63, //vmov.f64 d4, d19
1153 0xeeb05b62, //vmov.f64 d5, d18
1154 0xeeb06b61, //vmov.f64 d6, d17
1155 0xeeb07b60, //vmov.f64 d7, d16
1156 0xe12fff13, //bx r3
1157};
1158
1159CODE const uint32_t sk_move_src_dst_vfp4[] = {
1160 0xeeb04b40, //vmov.f64 d4, d0
1161 0xe4913004, //ldr r3, [r1], #4
1162 0xeeb05b41, //vmov.f64 d5, d1
1163 0xeeb06b42, //vmov.f64 d6, d2
1164 0xeeb07b43, //vmov.f64 d7, d3
1165 0xe12fff13, //bx r3
1166};
1167
1168CODE const uint32_t sk_move_dst_src_vfp4[] = {
1169 0xeeb00b44, //vmov.f64 d0, d4
1170 0xe4913004, //ldr r3, [r1], #4
1171 0xeeb01b45, //vmov.f64 d1, d5
1172 0xeeb02b46, //vmov.f64 d2, d6
1173 0xeeb03b47, //vmov.f64 d3, d7
1174 0xe12fff13, //bx r3
1175};
1176
1177CODE const uint32_t sk_premul_vfp4[] = {
1178 0xf3000d13, //vmul.f32 d0, d0, d3
1179 0xe4913004, //ldr r3, [r1], #4
1180 0xf3011d13, //vmul.f32 d1, d1, d3
1181 0xf3022d13, //vmul.f32 d2, d2, d3
1182 0xe12fff13, //bx r3
1183};
1184
1185CODE const uint32_t sk_unpremul_vfp4[] = {
1186 0xed2d8b04, //vpush {d8-d9}
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001187 0xeeb78a00, //vmov.f32 s16, #112
Mike Klein894d5612017-03-07 07:59:52 -05001188 0xf3f91503, //vceq.f32 d17, d3, #0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001189 0xf2c00010, //vmov.i32 d16, #0
Mike Klein894d5612017-03-07 07:59:52 -05001190 0xe4913004, //ldr r3, [r1], #4
1191 0xeec89a23, //vdiv.f32 s19, s16, s7
1192 0xee889a03, //vdiv.f32 s18, s16, s6
1193 0xf3501199, //vbsl d17, d16, d9
1194 0xf3010d90, //vmul.f32 d0, d17, d0
1195 0xf3011d91, //vmul.f32 d1, d17, d1
1196 0xf3012d92, //vmul.f32 d2, d17, d2
1197 0xecbd8b04, //vpop {d8-d9}
1198 0xe12fff13, //bx r3
Mike Klein64b97482017-03-14 17:35:04 -07001199 0xe320f000, //nop {0}
Mike Klein894d5612017-03-07 07:59:52 -05001200};
1201
1202CODE const uint32_t sk_from_srgb_vfp4[] = {
Mike Klein5224f462017-03-07 17:29:54 -05001203 0xeddf3b20, //vldr d19, [pc, #128]
1204 0xf3408d10, //vmul.f32 d24, d0, d0
1205 0xeddf0b1c, //vldr d16, [pc, #112]
1206 0xf26341b3, //vorr d20, d19, d19
1207 0xf26351b3, //vorr d21, d19, d19
1208 0xeddf9b1f, //vldr d25, [pc, #124]
Mike Klein894d5612017-03-07 07:59:52 -05001209 0xf2404c30, //vfma.f32 d20, d0, d16
Mike Klein5224f462017-03-07 17:29:54 -05001210 0xeddf2b1b, //vldr d18, [pc, #108]
1211 0xf2415c30, //vfma.f32 d21, d1, d16
1212 0xeddfcb1d, //vldr d28, [pc, #116]
1213 0xf2423c30, //vfma.f32 d19, d2, d16
Mike Klein894d5612017-03-07 07:59:52 -05001214 0xe4913004, //ldr r3, [r1], #4
Mike Klein5224f462017-03-07 17:29:54 -05001215 0xf3426d12, //vmul.f32 d22, d2, d2
1216 0xf3417d11, //vmul.f32 d23, d1, d1
1217 0xf3620e80, //vcgt.f32 d16, d18, d0
1218 0xf3621e81, //vcgt.f32 d17, d18, d1
1219 0xf341ad39, //vmul.f32 d26, d1, d25
1220 0xf342bd39, //vmul.f32 d27, d2, d25
1221 0xf3622e82, //vcgt.f32 d18, d18, d2
1222 0xf3409d39, //vmul.f32 d25, d0, d25
1223 0xf26cd1bc, //vorr d29, d28, d28
1224 0xf248dcb4, //vfma.f32 d29, d24, d20
1225 0xf26c41bc, //vorr d20, d28, d28
1226 0xf2474cb5, //vfma.f32 d20, d23, d21
1227 0xf246ccb3, //vfma.f32 d28, d22, d19
1228 0xf35901bd, //vbsl d16, d25, d29
1229 0xf35a11b4, //vbsl d17, d26, d20
1230 0xf35b21bc, //vbsl d18, d27, d28
1231 0xf22001b0, //vorr d0, d16, d16
1232 0xf22111b1, //vorr d1, d17, d17
1233 0xf22221b2, //vorr d2, d18, d18
Mike Klein894d5612017-03-07 07:59:52 -05001234 0xe12fff13, //bx r3
Mike Klein5224f462017-03-07 17:29:54 -05001235 0x3e99999a, //.word 0x3e99999a
1236 0x3e99999a, //.word 0x3e99999a
1237 0x3f328f5c, //.word 0x3f328f5c
1238 0x3f328f5c, //.word 0x3f328f5c
1239 0x3d6147ae, //.word 0x3d6147ae
1240 0x3d6147ae, //.word 0x3d6147ae
1241 0x3d9e8391, //.word 0x3d9e8391
1242 0x3d9e8391, //.word 0x3d9e8391
1243 0x3b23d70a, //.word 0x3b23d70a
1244 0x3b23d70a, //.word 0x3b23d70a
Mike Klein894d5612017-03-07 07:59:52 -05001245};
1246
1247CODE const uint32_t sk_to_srgb_vfp4[] = {
Mike Klein5224f462017-03-07 17:29:54 -05001248 0xf3fb0582, //vrsqrte.f32 d16, d2
1249 0xe4913004, //ldr r3, [r1], #4
Mike Klein894d5612017-03-07 07:59:52 -05001250 0xf3fb1581, //vrsqrte.f32 d17, d1
Mike Klein5224f462017-03-07 17:29:54 -05001251 0xf3fb2580, //vrsqrte.f32 d18, d0
Mike Klein894d5612017-03-07 07:59:52 -05001252 0xf3403db0, //vmul.f32 d19, d16, d16
1253 0xf3414db1, //vmul.f32 d20, d17, d17
1254 0xf3425db2, //vmul.f32 d21, d18, d18
Mike Klein5224f462017-03-07 17:29:54 -05001255 0xf2623f33, //vrsqrts.f32 d19, d2, d19
Mike Klein894d5612017-03-07 07:59:52 -05001256 0xf2614f34, //vrsqrts.f32 d20, d1, d20
Mike Klein5224f462017-03-07 17:29:54 -05001257 0xf2605f35, //vrsqrts.f32 d21, d0, d21
Mike Klein894d5612017-03-07 07:59:52 -05001258 0xf3400db3, //vmul.f32 d16, d16, d19
1259 0xf3411db4, //vmul.f32 d17, d17, d20
1260 0xf3422db5, //vmul.f32 d18, d18, d21
1261 0xf3fb3520, //vrecpe.f32 d19, d16
1262 0xf3fb4521, //vrecpe.f32 d20, d17
1263 0xf3fb6522, //vrecpe.f32 d22, d18
Mike Klein5224f462017-03-07 17:29:54 -05001264 0xf3fb55a0, //vrsqrte.f32 d21, d16
1265 0xf3fb75a1, //vrsqrte.f32 d23, d17
1266 0xf3fb85a2, //vrsqrte.f32 d24, d18
Mike Klein894d5612017-03-07 07:59:52 -05001267 0xf2409fb3, //vrecps.f32 d25, d16, d19
1268 0xf241afb4, //vrecps.f32 d26, d17, d20
1269 0xf242bfb6, //vrecps.f32 d27, d18, d22
1270 0xf345cdb5, //vmul.f32 d28, d21, d21
1271 0xf347ddb7, //vmul.f32 d29, d23, d23
1272 0xf348edb8, //vmul.f32 d30, d24, d24
Mike Klein5224f462017-03-07 17:29:54 -05001273 0xf2600fbc, //vrsqrts.f32 d16, d16, d28
1274 0xf2611fbd, //vrsqrts.f32 d17, d17, d29
1275 0xf2622fbe, //vrsqrts.f32 d18, d18, d30
Mike Klein894d5612017-03-07 07:59:52 -05001276 0xf3433db9, //vmul.f32 d19, d19, d25
Mike Klein5224f462017-03-07 17:29:54 -05001277 0xeddf9b21, //vldr d25, [pc, #132]
Mike Klein894d5612017-03-07 07:59:52 -05001278 0xf3444dba, //vmul.f32 d20, d20, d26
Mike Klein5224f462017-03-07 17:29:54 -05001279 0xeddfab21, //vldr d26, [pc, #132]
Mike Klein894d5612017-03-07 07:59:52 -05001280 0xf3466dbb, //vmul.f32 d22, d22, d27
Mike Klein894d5612017-03-07 07:59:52 -05001281 0xf26ab1ba, //vorr d27, d26, d26
Mike Klein5224f462017-03-07 17:29:54 -05001282 0xf243bcb9, //vfma.f32 d27, d19, d25
Mike Klein894d5612017-03-07 07:59:52 -05001283 0xf26a31ba, //vorr d19, d26, d26
Mike Klein5224f462017-03-07 17:29:54 -05001284 0xf2443cb9, //vfma.f32 d19, d20, d25
1285 0xeddf4b1d, //vldr d20, [pc, #116]
1286 0xf246acb9, //vfma.f32 d26, d22, d25
1287 0xf3450db0, //vmul.f32 d16, d21, d16
1288 0xeddf5b1c, //vldr d21, [pc, #112]
1289 0xf3471db1, //vmul.f32 d17, d23, d17
1290 0xf3482db2, //vmul.f32 d18, d24, d18
1291 0xf3406d35, //vmul.f32 d22, d0, d21
1292 0xf240bcb4, //vfma.f32 d27, d16, d20
1293 0xf2413cb4, //vfma.f32 d19, d17, d20
1294 0xf242acb4, //vfma.f32 d26, d18, d20
1295 0xeddf2b17, //vldr d18, [pc, #92]
1296 0xf3417d35, //vmul.f32 d23, d1, d21
1297 0xf3620e80, //vcgt.f32 d16, d18, d0
1298 0xf3621e81, //vcgt.f32 d17, d18, d1
1299 0xf3622e82, //vcgt.f32 d18, d18, d2
1300 0xf3425d35, //vmul.f32 d21, d2, d21
1301 0xf2c74f10, //vmov.f32 d20, #1
1302 0xf2648faa, //vmin.f32 d24, d20, d26
Mike Klein894d5612017-03-07 07:59:52 -05001303 0xf2643fa3, //vmin.f32 d19, d20, d19
Mike Klein5224f462017-03-07 17:29:54 -05001304 0xf2644fab, //vmin.f32 d20, d20, d27
1305 0xf35601b8, //vbsl d16, d22, d24
1306 0xf35711b3, //vbsl d17, d23, d19
1307 0xf35521b4, //vbsl d18, d21, d20
1308 0xf22001b0, //vorr d0, d16, d16
1309 0xf22111b1, //vorr d1, d17, d17
1310 0xf22221b2, //vorr d2, d18, d18
Mike Klein894d5612017-03-07 07:59:52 -05001311 0xe12fff13, //bx r3
Mike Klein5224f462017-03-07 17:29:54 -05001312 0x3f306fce, //.word 0x3f306fce
1313 0x3f306fce, //.word 0x3f306fce
1314 0xbdca57a8, //.word 0xbdca57a8
1315 0xbdca57a8, //.word 0xbdca57a8
1316 0x3ed287c2, //.word 0x3ed287c2
1317 0x3ed287c2, //.word 0x3ed287c2
1318 0x41475c29, //.word 0x41475c29
1319 0x41475c29, //.word 0x41475c29
1320 0x3b8ce704, //.word 0x3b8ce704
1321 0x3b8ce704, //.word 0x3b8ce704
Mike Klein894d5612017-03-07 07:59:52 -05001322};
1323
1324CODE const uint32_t sk_scale_1_float_vfp4[] = {
Mike Klein894d5612017-03-07 07:59:52 -05001325 0xe8911008, //ldm r1, {r3, ip}
1326 0xe2811008, //add r1, r1, #8
Mike Klein64b97482017-03-14 17:35:04 -07001327 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1328 0xf3000d90, //vmul.f32 d0, d16, d0
1329 0xf3001d91, //vmul.f32 d1, d16, d1
1330 0xf3002d92, //vmul.f32 d2, d16, d2
1331 0xf3003d93, //vmul.f32 d3, d16, d3
Mike Klein894d5612017-03-07 07:59:52 -05001332 0xe12fff1c, //bx ip
1333};
1334
1335CODE const uint32_t sk_scale_u8_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001336 0xe24dd004, //sub sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001337 0xe8911008, //ldm r1, {r3, ip}
1338 0xe2811008, //add r1, r1, #8
1339 0xe5933000, //ldr r3, [r3]
1340 0xe0833000, //add r3, r3, r0
1341 0xe1d330b0, //ldrh r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001342 0xe1cd30b0, //strh r3, [sp]
1343 0xe1a0300d, //mov r3, sp
Mike Klein894d5612017-03-07 07:59:52 -05001344 0xf4e3041f, //vld1.16 {d16[0]}, [r3 :16]
1345 0xf3c80a30, //vmovl.u8 q8, d16
1346 0xf3d00a30, //vmovl.u16 q8, d16
1347 0xf3fb06a0, //vcvt.f32.u32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001348 0xeddf1b06, //vldr d17, [pc, #24]
1349 0xf3400db1, //vmul.f32 d16, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001350 0xf3000d90, //vmul.f32 d0, d16, d0
1351 0xf3001d91, //vmul.f32 d1, d16, d1
1352 0xf3002d92, //vmul.f32 d2, d16, d2
1353 0xf3003d93, //vmul.f32 d3, d16, d3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001354 0xe28dd004, //add sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001355 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001356 0x3b808081, //.word 0x3b808081
1357 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001358};
1359
1360CODE const uint32_t sk_lerp_1_float_vfp4[] = {
1361 0xe8911008, //ldm r1, {r3, ip}
1362 0xf2600d04, //vsub.f32 d16, d0, d4
1363 0xf2611d05, //vsub.f32 d17, d1, d5
1364 0xf2622d06, //vsub.f32 d18, d2, d6
1365 0xe2811008, //add r1, r1, #8
1366 0xf2633d07, //vsub.f32 d19, d3, d7
1367 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1368 0xf2240114, //vorr d0, d4, d4
1369 0xf2251115, //vorr d1, d5, d5
1370 0xf2262116, //vorr d2, d6, d6
1371 0xf2273117, //vorr d3, d7, d7
1372 0xf2000cb4, //vfma.f32 d0, d16, d20
1373 0xf2011cb4, //vfma.f32 d1, d17, d20
1374 0xf2022cb4, //vfma.f32 d2, d18, d20
1375 0xf2033cb4, //vfma.f32 d3, d19, d20
1376 0xe12fff1c, //bx ip
1377};
1378
1379CODE const uint32_t sk_lerp_u8_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001380 0xe24dd004, //sub sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001381 0xe8911008, //ldm r1, {r3, ip}
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001382 0xf2602d04, //vsub.f32 d18, d0, d4
Mike Klein894d5612017-03-07 07:59:52 -05001383 0xf2623d06, //vsub.f32 d19, d2, d6
1384 0xf2634d07, //vsub.f32 d20, d3, d7
1385 0xe2811008, //add r1, r1, #8
1386 0xe5933000, //ldr r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001387 0xf2240114, //vorr d0, d4, d4
Mike Klein894d5612017-03-07 07:59:52 -05001388 0xf2262116, //vorr d2, d6, d6
1389 0xe0833000, //add r3, r3, r0
1390 0xf2273117, //vorr d3, d7, d7
1391 0xe1d330b0, //ldrh r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001392 0xe1cd30b0, //strh r3, [sp]
1393 0xe1a0300d, //mov r3, sp
Mike Klein894d5612017-03-07 07:59:52 -05001394 0xf4e3041f, //vld1.16 {d16[0]}, [r3 :16]
1395 0xf3c80a30, //vmovl.u8 q8, d16
1396 0xf3d00a30, //vmovl.u16 q8, d16
1397 0xf3fb06a0, //vcvt.f32.u32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001398 0xeddf1b08, //vldr d17, [pc, #32]
1399 0xf3400db1, //vmul.f32 d16, d16, d17
1400 0xf2611d05, //vsub.f32 d17, d1, d5
1401 0xf2251115, //vorr d1, d5, d5
1402 0xf2020cb0, //vfma.f32 d0, d18, d16
1403 0xf2011cb0, //vfma.f32 d1, d17, d16
Mike Klein894d5612017-03-07 07:59:52 -05001404 0xf2032cb0, //vfma.f32 d2, d19, d16
1405 0xf2043cb0, //vfma.f32 d3, d20, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001406 0xe28dd004, //add sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001407 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001408 0x3b808081, //.word 0x3b808081
1409 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001410};
1411
1412CODE const uint32_t sk_lerp_565_vfp4[] = {
Mike Klein5224f462017-03-07 17:29:54 -05001413 0xe24dd004, //sub sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001414 0xe8911008, //ldm r1, {r3, ip}
Mike Klein5224f462017-03-07 17:29:54 -05001415 0xf3c72218, //vmov.i32 d18, #63488
1416 0xf2c1101f, //vmov.i32 d17, #31
Mike Klein894d5612017-03-07 07:59:52 -05001417 0xf2603d04, //vsub.f32 d19, d0, d4
Mike Klein894d5612017-03-07 07:59:52 -05001418 0xe2811008, //add r1, r1, #8
1419 0xe5933000, //ldr r3, [r3]
Mike Klein5224f462017-03-07 17:29:54 -05001420 0xf2616d05, //vsub.f32 d22, d1, d5
1421 0xf2240114, //vorr d0, d4, d4
Mike Klein894d5612017-03-07 07:59:52 -05001422 0xf2251115, //vorr d1, d5, d5
Mike Klein5224f462017-03-07 17:29:54 -05001423 0xe7933080, //ldr r3, [r3, r0, lsl #1]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001424 0xf2873f10, //vmov.f32 d3, #1
Mike Klein5224f462017-03-07 17:29:54 -05001425 0xe58d3000, //str r3, [sp]
1426 0xe1a0300d, //mov r3, sp
1427 0xf4e3083f, //vld1.32 {d16[0]}, [r3 :32]
1428 0xe3a03e7e, //mov r3, #2016
1429 0xf3d04a30, //vmovl.u16 q10, d16
1430 0xee803b90, //vdup.32 d16, r3
1431 0xf24421b2, //vand d18, d20, d18
1432 0xf24411b1, //vand d17, d20, d17
1433 0xeddf5b12, //vldr d21, [pc, #72]
1434 0xf24401b0, //vand d16, d20, d16
1435 0xeddf4b0e, //vldr d20, [pc, #56]
1436 0xf3fb2622, //vcvt.f32.s32 d18, d18
1437 0xf3fb0620, //vcvt.f32.s32 d16, d16
1438 0xf3fb1621, //vcvt.f32.s32 d17, d17
1439 0xf3422db4, //vmul.f32 d18, d18, d20
1440 0xeddf4b0d, //vldr d20, [pc, #52]
1441 0xf3400db5, //vmul.f32 d16, d16, d21
1442 0xf2625d06, //vsub.f32 d21, d2, d6
1443 0xf3411db4, //vmul.f32 d17, d17, d20
1444 0xf2262116, //vorr d2, d6, d6
1445 0xf2030cb2, //vfma.f32 d0, d19, d18
1446 0xf2061cb0, //vfma.f32 d1, d22, d16
1447 0xf2052cb1, //vfma.f32 d2, d21, d17
1448 0xe28dd004, //add sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001449 0xe12fff1c, //bx ip
Mike Klein5224f462017-03-07 17:29:54 -05001450 0xe320f000, //nop {0}
1451 0x37842108, //.word 0x37842108
1452 0x37842108, //.word 0x37842108
1453 0x3a020821, //.word 0x3a020821
1454 0x3a020821, //.word 0x3a020821
1455 0x3d042108, //.word 0x3d042108
1456 0x3d042108, //.word 0x3d042108
Mike Klein894d5612017-03-07 07:59:52 -05001457};
1458
1459CODE const uint32_t sk_load_tables_vfp4[] = {
1460 0xe92d48f0, //push {r4, r5, r6, r7, fp, lr}
1461 0xe8911008, //ldm r1, {r3, ip}
Mike Klein5224f462017-03-07 17:29:54 -05001462 0xf3c7001f, //vmov.i32 d16, #255
Mike Klein894d5612017-03-07 07:59:52 -05001463 0xe2811008, //add r1, r1, #8
1464 0xe593e000, //ldr lr, [r3]
1465 0xe99300b0, //ldmib r3, {r4, r5, r7}
Mike Klein5224f462017-03-07 17:29:54 -05001466 0xe08e3100, //add r3, lr, r0, lsl #2
1467 0xedd31b00, //vldr d17, [r3]
1468 0xf24121b0, //vand d18, d17, d16
1469 0xf3f83031, //vshr.u32 d19, d17, #8
1470 0xee323b90, //vmov.32 r3, d18[1]
Mike Klein894d5612017-03-07 07:59:52 -05001471 0xee126b90, //vmov.32 r6, d18[0]
Mike Klein5224f462017-03-07 17:29:54 -05001472 0xf3f02031, //vshr.u32 d18, d17, #16
1473 0xf24221b0, //vand d18, d18, d16
1474 0xf24301b0, //vand d16, d19, d16
1475 0xe0843103, //add r3, r4, r3, lsl #2
1476 0xedd30a00, //vldr s1, [r3]
1477 0xe0843106, //add r3, r4, r6, lsl #2
1478 0xee326b90, //vmov.32 r6, d18[1]
1479 0xed930a00, //vldr s0, [r3]
1480 0xee303b90, //vmov.32 r3, d16[1]
1481 0xee104b90, //vmov.32 r4, d16[0]
Mike Klein894d5612017-03-07 07:59:52 -05001482 0xf3e80031, //vshr.u32 d16, d17, #24
Mike Klein5224f462017-03-07 17:29:54 -05001483 0xeddf1b0d, //vldr d17, [pc, #52]
Mike Klein894d5612017-03-07 07:59:52 -05001484 0xf3fb0620, //vcvt.f32.s32 d16, d16
Mike Klein5224f462017-03-07 17:29:54 -05001485 0xf3003db1, //vmul.f32 d3, d16, d17
1486 0xe087e106, //add lr, r7, r6, lsl #2
Mike Klein894d5612017-03-07 07:59:52 -05001487 0xee126b90, //vmov.32 r6, d18[0]
Mike Klein5224f462017-03-07 17:29:54 -05001488 0xe0853103, //add r3, r5, r3, lsl #2
1489 0xedde2a00, //vldr s5, [lr]
1490 0xedd31a00, //vldr s3, [r3]
1491 0xe0853104, //add r3, r5, r4, lsl #2
1492 0xed931a00, //vldr s2, [r3]
1493 0xe0873106, //add r3, r7, r6, lsl #2
1494 0xed932a00, //vldr s4, [r3]
Mike Klein894d5612017-03-07 07:59:52 -05001495 0xe8bd48f0, //pop {r4, r5, r6, r7, fp, lr}
1496 0xe12fff1c, //bx ip
Mike Klein5224f462017-03-07 17:29:54 -05001497 0xe320f000, //nop {0}
1498 0x3b808081, //.word 0x3b808081
1499 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001500};
1501
1502CODE const uint32_t sk_load_a8_vfp4[] = {
1503 0xe24dd004, //sub sp, sp, #4
1504 0xe8911008, //ldm r1, {r3, ip}
1505 0xe2811008, //add r1, r1, #8
Mike Klein5224f462017-03-07 17:29:54 -05001506 0xf2800010, //vmov.i32 d0, #0
Mike Klein894d5612017-03-07 07:59:52 -05001507 0xf2801010, //vmov.i32 d1, #0
Mike Klein894d5612017-03-07 07:59:52 -05001508 0xe5933000, //ldr r3, [r3]
Mike Klein5224f462017-03-07 17:29:54 -05001509 0xf2802010, //vmov.i32 d2, #0
Mike Klein894d5612017-03-07 07:59:52 -05001510 0xe0833000, //add r3, r3, r0
1511 0xe1d330b0, //ldrh r3, [r3]
1512 0xe1cd30b0, //strh r3, [sp]
1513 0xe1a0300d, //mov r3, sp
1514 0xf4e3041f, //vld1.16 {d16[0]}, [r3 :16]
Mike Klein894d5612017-03-07 07:59:52 -05001515 0xf3c80a30, //vmovl.u8 q8, d16
1516 0xf3d00a30, //vmovl.u16 q8, d16
1517 0xf3fb06a0, //vcvt.f32.u32 d16, d16
Mike Klein5224f462017-03-07 17:29:54 -05001518 0xeddf1b03, //vldr d17, [pc, #12]
1519 0xf3003db1, //vmul.f32 d3, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001520 0xe28dd004, //add sp, sp, #4
1521 0xe12fff1c, //bx ip
Mike Klein5224f462017-03-07 17:29:54 -05001522 0xe320f000, //nop {0}
1523 0x3b808081, //.word 0x3b808081
1524 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001525};
1526
1527CODE const uint32_t sk_store_a8_vfp4[] = {
1528 0xe92d4800, //push {fp, lr}
Mike Klein5224f462017-03-07 17:29:54 -05001529 0xeddf0b0d, //vldr d16, [pc, #52]
1530 0xf2c3161f, //vmov.i32 d17, #1056964608
1531 0xf2431c30, //vfma.f32 d17, d3, d16
Mike Klein894d5612017-03-07 07:59:52 -05001532 0xe5913000, //ldr r3, [r1]
Mike Klein894d5612017-03-07 07:59:52 -05001533 0xe5933000, //ldr r3, [r3]
Mike Klein5224f462017-03-07 17:29:54 -05001534 0xf3fb07a1, //vcvt.u32.f32 d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001535 0xee10eb90, //vmov.32 lr, d16[0]
1536 0xee30cb90, //vmov.32 ip, d16[1]
1537 0xe7e3e000, //strb lr, [r3, r0]!
1538 0xe5c3c001, //strb ip, [r3, #1]
Mike Klein5224f462017-03-07 17:29:54 -05001539 0xe2813008, //add r3, r1, #8
1540 0xe591c004, //ldr ip, [r1, #4]
1541 0xe1a01003, //mov r1, r3
Mike Klein894d5612017-03-07 07:59:52 -05001542 0xe8bd4800, //pop {fp, lr}
Mike Klein5224f462017-03-07 17:29:54 -05001543 0xe12fff1c, //bx ip
1544 0x437f0000, //.word 0x437f0000
1545 0x437f0000, //.word 0x437f0000
Mike Klein894d5612017-03-07 07:59:52 -05001546};
1547
1548CODE const uint32_t sk_load_565_vfp4[] = {
1549 0xe24dd004, //sub sp, sp, #4
1550 0xe8911008, //ldm r1, {r3, ip}
Mike Klein5224f462017-03-07 17:29:54 -05001551 0xf2c1101f, //vmov.i32 d17, #31
1552 0xf3c72218, //vmov.i32 d18, #63488
1553 0xeddf3b16, //vldr d19, [pc, #88]
Mike Klein894d5612017-03-07 07:59:52 -05001554 0xe2811008, //add r1, r1, #8
1555 0xe5933000, //ldr r3, [r3]
Mike Klein5224f462017-03-07 17:29:54 -05001556 0xf2873f10, //vmov.f32 d3, #1
Mike Klein894d5612017-03-07 07:59:52 -05001557 0xe7933080, //ldr r3, [r3, r0, lsl #1]
1558 0xe58d3000, //str r3, [sp]
1559 0xe1a0300d, //mov r3, sp
1560 0xf4e3083f, //vld1.32 {d16[0]}, [r3 :32]
Mike Klein5224f462017-03-07 17:29:54 -05001561 0xe3a03e7e, //mov r3, #2016
Mike Klein894d5612017-03-07 07:59:52 -05001562 0xf3d04a30, //vmovl.u16 q10, d16
Mike Klein5224f462017-03-07 17:29:54 -05001563 0xee803b90, //vdup.32 d16, r3
1564 0xf24411b1, //vand d17, d20, d17
1565 0xeddf5b0e, //vldr d21, [pc, #56]
1566 0xf24421b2, //vand d18, d20, d18
1567 0xf24401b0, //vand d16, d20, d16
1568 0xeddf4b09, //vldr d20, [pc, #36]
1569 0xf3fb2622, //vcvt.f32.s32 d18, d18
Mike Klein894d5612017-03-07 07:59:52 -05001570 0xf3fb0620, //vcvt.f32.s32 d16, d16
1571 0xf3fb1621, //vcvt.f32.s32 d17, d17
Mike Klein5224f462017-03-07 17:29:54 -05001572 0xf3020db3, //vmul.f32 d0, d18, d19
1573 0xf3001db4, //vmul.f32 d1, d16, d20
1574 0xf3012db5, //vmul.f32 d2, d17, d21
Mike Klein894d5612017-03-07 07:59:52 -05001575 0xe28dd004, //add sp, sp, #4
1576 0xe12fff1c, //bx ip
Mike Klein5224f462017-03-07 17:29:54 -05001577 0x37842108, //.word 0x37842108
1578 0x37842108, //.word 0x37842108
1579 0x3a020821, //.word 0x3a020821
1580 0x3a020821, //.word 0x3a020821
1581 0x3d042108, //.word 0x3d042108
1582 0x3d042108, //.word 0x3d042108
Mike Klein894d5612017-03-07 07:59:52 -05001583};
1584
1585CODE const uint32_t sk_store_565_vfp4[] = {
Mike Klein5224f462017-03-07 17:29:54 -05001586 0xf2c30f1f, //vmov.f32 d16, #31
1587 0xeddf1b15, //vldr d17, [pc, #84]
Mike Klein894d5612017-03-07 07:59:52 -05001588 0xf2c3361f, //vmov.i32 d19, #1056964608
Mike Klein894d5612017-03-07 07:59:52 -05001589 0xe5913000, //ldr r3, [r1]
Mike Klein5224f462017-03-07 17:29:54 -05001590 0xf2413c31, //vfma.f32 d19, d1, d17
1591 0xf2c3161f, //vmov.i32 d17, #1056964608
1592 0xf2401c30, //vfma.f32 d17, d0, d16
Mike Klein894d5612017-03-07 07:59:52 -05001593 0xe5933000, //ldr r3, [r3]
Mike Klein5224f462017-03-07 17:29:54 -05001594 0xf2c3261f, //vmov.i32 d18, #1056964608
1595 0xf2422c30, //vfma.f32 d18, d2, d16
Mike Klein894d5612017-03-07 07:59:52 -05001596 0xe0833080, //add r3, r3, r0, lsl #1
Mike Klein5224f462017-03-07 17:29:54 -05001597 0xf3fb07a3, //vcvt.u32.f32 d16, d19
1598 0xf3fb17a1, //vcvt.u32.f32 d17, d17
1599 0xf3fb27a2, //vcvt.u32.f32 d18, d18
1600 0xf2e50530, //vshl.s32 d16, d16, #5
Mike Klein894d5612017-03-07 07:59:52 -05001601 0xf2eb1531, //vshl.s32 d17, d17, #11
Mike Klein5224f462017-03-07 17:29:54 -05001602 0xf26001b1, //vorr d16, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001603 0xf26001b2, //vorr d16, d16, d18
1604 0xf3f60121, //vuzp.16 d16, d17
1605 0xf4c3080f, //vst1.32 {d16[0]}, [r3]
Mike Klein5224f462017-03-07 17:29:54 -05001606 0xe2813008, //add r3, r1, #8
1607 0xe591c004, //ldr ip, [r1, #4]
1608 0xe1a01003, //mov r1, r3
1609 0xe12fff1c, //bx ip
1610 0x427c0000, //.word 0x427c0000
1611 0x427c0000, //.word 0x427c0000
Mike Klein894d5612017-03-07 07:59:52 -05001612};
1613
1614CODE const uint32_t sk_load_8888_vfp4[] = {
Mike Klein894d5612017-03-07 07:59:52 -05001615 0xe8911008, //ldm r1, {r3, ip}
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001616 0xf3c7001f, //vmov.i32 d16, #255
Mike Klein894d5612017-03-07 07:59:52 -05001617 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001618 0xe5933000, //ldr r3, [r3]
1619 0xe0833100, //add r3, r3, r0, lsl #2
Mike Klein894d5612017-03-07 07:59:52 -05001620 0xedd31b00, //vldr d17, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001621 0xf24121b0, //vand d18, d17, d16
Mike Klein894d5612017-03-07 07:59:52 -05001622 0xf3f83031, //vshr.u32 d19, d17, #8
1623 0xf3e84031, //vshr.u32 d20, d17, #24
1624 0xf3f01031, //vshr.u32 d17, d17, #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001625 0xf24331b0, //vand d19, d19, d16
1626 0xf24101b0, //vand d16, d17, d16
1627 0xeddf1b08, //vldr d17, [pc, #32]
Mike Klein894d5612017-03-07 07:59:52 -05001628 0xf3fb2622, //vcvt.f32.s32 d18, d18
1629 0xf3fb4624, //vcvt.f32.s32 d20, d20
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001630 0xf3fb3623, //vcvt.f32.s32 d19, d19
Mike Klein894d5612017-03-07 07:59:52 -05001631 0xf3fb0620, //vcvt.f32.s32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001632 0xf3020db1, //vmul.f32 d0, d18, d17
1633 0xf3043db1, //vmul.f32 d3, d20, d17
1634 0xf3031db1, //vmul.f32 d1, d19, d17
1635 0xf3002db1, //vmul.f32 d2, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001636 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001637 0x3b808081, //.word 0x3b808081
1638 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001639};
1640
1641CODE const uint32_t sk_store_8888_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001642 0xeddf0b1a, //vldr d16, [pc, #104]
Mike Klein894d5612017-03-07 07:59:52 -05001643 0xf2c3261f, //vmov.i32 d18, #1056964608
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001644 0xf2412c30, //vfma.f32 d18, d1, d16
Mike Klein894d5612017-03-07 07:59:52 -05001645 0xe5913000, //ldr r3, [r1]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001646 0xf2c3361f, //vmov.i32 d19, #1056964608
1647 0xf2c3161f, //vmov.i32 d17, #1056964608
1648 0xf2423c30, //vfma.f32 d19, d2, d16
Mike Klein894d5612017-03-07 07:59:52 -05001649 0xe5933000, //ldr r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001650 0xf2c3461f, //vmov.i32 d20, #1056964608
1651 0xf2401c30, //vfma.f32 d17, d0, d16
Mike Klein894d5612017-03-07 07:59:52 -05001652 0xe0833100, //add r3, r3, r0, lsl #2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001653 0xf2434c30, //vfma.f32 d20, d3, d16
1654 0xf3fb07a2, //vcvt.u32.f32 d16, d18
Mike Klein894d5612017-03-07 07:59:52 -05001655 0xf3fb27a3, //vcvt.u32.f32 d18, d19
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001656 0xf3fb17a1, //vcvt.u32.f32 d17, d17
Mike Klein894d5612017-03-07 07:59:52 -05001657 0xf3fb37a4, //vcvt.u32.f32 d19, d20
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001658 0xf2e80530, //vshl.s32 d16, d16, #8
Mike Klein894d5612017-03-07 07:59:52 -05001659 0xf2f02532, //vshl.s32 d18, d18, #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001660 0xf26001b1, //vorr d16, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001661 0xf2f81533, //vshl.s32 d17, d19, #24
1662 0xf26001b2, //vorr d16, d16, d18
1663 0xf26001b1, //vorr d16, d16, d17
1664 0xedc30b00, //vstr d16, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001665 0xe2813008, //add r3, r1, #8
1666 0xe591c004, //ldr ip, [r1, #4]
1667 0xe1a01003, //mov r1, r3
1668 0xe12fff1c, //bx ip
1669 0xe320f000, //nop {0}
1670 0x437f0000, //.word 0x437f0000
1671 0x437f0000, //.word 0x437f0000
Mike Klein894d5612017-03-07 07:59:52 -05001672};
1673
1674CODE const uint32_t sk_load_f16_vfp4[] = {
Mike Klein894d5612017-03-07 07:59:52 -05001675 0xe8911008, //ldm r1, {r3, ip}
1676 0xe2811008, //add r1, r1, #8
1677 0xe5933000, //ldr r3, [r3]
1678 0xe0833180, //add r3, r3, r0, lsl #3
1679 0xf463084f, //vld2.16 {d16-d17}, [r3]
Mike Klein64b97482017-03-14 17:35:04 -07001680 0xf3f62720, //vcvt.f32.f16 q9, d16
1681 0xf3f60721, //vcvt.f32.f16 q8, d17
1682 0xf22201b2, //vorr d0, d18, d18
1683 0xf22011b0, //vorr d1, d16, d16
1684 0xf3ba00a3, //vtrn.32 d0, d19
1685 0xf22321b3, //vorr d2, d19, d19
1686 0xf3ba10a1, //vtrn.32 d1, d17
1687 0xf22131b1, //vorr d3, d17, d17
Mike Klein894d5612017-03-07 07:59:52 -05001688 0xe12fff1c, //bx ip
1689};
1690
1691CODE const uint32_t sk_store_f16_vfp4[] = {
1692 0xeef00b41, //vmov.f64 d16, d1
1693 0xeef03b42, //vmov.f64 d19, d2
1694 0xf2631113, //vorr d17, d3, d3
1695 0xf2602110, //vorr d18, d0, d0
1696 0xf3fa00a1, //vtrn.32 d16, d17
1697 0xf3f61620, //vcvt.f16.f32 d17, q8
1698 0xf3fa20a3, //vtrn.32 d18, d19
1699 0xe5913000, //ldr r3, [r1]
1700 0xf3f60622, //vcvt.f16.f32 d16, q9
1701 0xe5933000, //ldr r3, [r3]
1702 0xe0833180, //add r3, r3, r0, lsl #3
1703 0xf443084f, //vst2.16 {d16-d17}, [r3]
1704 0xe2813008, //add r3, r1, #8
1705 0xe591c004, //ldr ip, [r1, #4]
1706 0xe1a01003, //mov r1, r3
1707 0xe12fff1c, //bx ip
1708};
1709
1710CODE const uint32_t sk_store_f32_vfp4[] = {
1711 0xe5913000, //ldr r3, [r1]
1712 0xe5933000, //ldr r3, [r3]
1713 0xe0833200, //add r3, r3, r0, lsl #4
1714 0xf403008f, //vst4.32 {d0-d3}, [r3]
1715 0xe2813008, //add r3, r1, #8
1716 0xe591c004, //ldr ip, [r1, #4]
1717 0xe1a01003, //mov r1, r3
1718 0xe12fff1c, //bx ip
1719};
1720
1721CODE const uint32_t sk_clamp_x_vfp4[] = {
1722 0xe8911008, //ldm r1, {r3, ip}
1723 0xf2c00010, //vmov.i32 d16, #0
1724 0xf3c71e1f, //vmov.i8 d17, #255
1725 0xf2400f80, //vmax.f32 d16, d16, d0
1726 0xe2811008, //add r1, r1, #8
1727 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1728 0xf26218a1, //vadd.i32 d17, d18, d17
1729 0xf2200fa1, //vmin.f32 d0, d16, d17
1730 0xe12fff1c, //bx ip
1731};
1732
1733CODE const uint32_t sk_clamp_y_vfp4[] = {
1734 0xe8911008, //ldm r1, {r3, ip}
1735 0xf2c00010, //vmov.i32 d16, #0
1736 0xf3c71e1f, //vmov.i8 d17, #255
1737 0xf2400f81, //vmax.f32 d16, d16, d1
1738 0xe2811008, //add r1, r1, #8
1739 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1740 0xf26218a1, //vadd.i32 d17, d18, d17
1741 0xf2201fa1, //vmin.f32 d1, d16, d17
1742 0xe12fff1c, //bx ip
1743};
1744
1745CODE const uint32_t sk_repeat_x_vfp4[] = {
1746 0xed2d8b04, //vpush {d8-d9}
1747 0xe8911008, //ldm r1, {r3, ip}
1748 0xf2c02010, //vmov.i32 d18, #0
Mike Klein894d5612017-03-07 07:59:52 -05001749 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001750 0xeddf3b10, //vldr d19, [pc, #64]
Mike Klein894d5612017-03-07 07:59:52 -05001751 0xed938a00, //vldr s16, [r3]
1752 0xeec09a88, //vdiv.f32 s19, s1, s16
1753 0xee809a08, //vdiv.f32 s18, s0, s16
1754 0xf3fb0709, //vcvt.s32.f32 d16, d9
1755 0xf3fb0620, //vcvt.f32.s32 d16, d16
1756 0xf3601e89, //vcgt.f32 d17, d16, d9
1757 0xf35311b2, //vbsl d17, d19, d18
1758 0xf3f42c08, //vdup.32 d18, d8[0]
1759 0xf2600da1, //vsub.f32 d16, d16, d17
1760 0xf3c71e1f, //vmov.i8 d17, #255
1761 0xf26218a1, //vadd.i32 d17, d18, d17
1762 0xf2e009c8, //vmul.f32 d16, d16, d8[0]
1763 0xf2600d20, //vsub.f32 d16, d0, d16
1764 0xf2200fa1, //vmin.f32 d0, d16, d17
1765 0xecbd8b04, //vpop {d8-d9}
1766 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001767 0xe320f000, //nop {0}
1768 0x3f800000, //.word 0x3f800000
1769 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001770};
1771
1772CODE const uint32_t sk_repeat_y_vfp4[] = {
1773 0xed2d8b04, //vpush {d8-d9}
1774 0xe8911008, //ldm r1, {r3, ip}
1775 0xf2c02010, //vmov.i32 d18, #0
Mike Klein894d5612017-03-07 07:59:52 -05001776 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001777 0xeddf3b10, //vldr d19, [pc, #64]
Mike Klein894d5612017-03-07 07:59:52 -05001778 0xed938a00, //vldr s16, [r3]
1779 0xeec19a88, //vdiv.f32 s19, s3, s16
1780 0xee819a08, //vdiv.f32 s18, s2, s16
1781 0xf3fb0709, //vcvt.s32.f32 d16, d9
1782 0xf3fb0620, //vcvt.f32.s32 d16, d16
1783 0xf3601e89, //vcgt.f32 d17, d16, d9
1784 0xf35311b2, //vbsl d17, d19, d18
1785 0xf3f42c08, //vdup.32 d18, d8[0]
1786 0xf2600da1, //vsub.f32 d16, d16, d17
1787 0xf3c71e1f, //vmov.i8 d17, #255
1788 0xf26218a1, //vadd.i32 d17, d18, d17
1789 0xf2e009c8, //vmul.f32 d16, d16, d8[0]
1790 0xf2610d20, //vsub.f32 d16, d1, d16
1791 0xf2201fa1, //vmin.f32 d1, d16, d17
1792 0xecbd8b04, //vpop {d8-d9}
1793 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001794 0xe320f000, //nop {0}
1795 0x3f800000, //.word 0x3f800000
1796 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001797};
1798
1799CODE const uint32_t sk_mirror_x_vfp4[] = {
1800 0xed2d8b04, //vpush {d8-d9}
1801 0xe8911008, //ldm r1, {r3, ip}
1802 0xf2c03010, //vmov.i32 d19, #0
Mike Klein894d5612017-03-07 07:59:52 -05001803 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001804 0xeddf4b14, //vldr d20, [pc, #80]
Mike Klein894d5612017-03-07 07:59:52 -05001805 0xed938a00, //vldr s16, [r3]
1806 0xee389a08, //vadd.f32 s18, s16, s16
1807 0xf3f40c08, //vdup.32 d16, d8[0]
1808 0xf2200d20, //vsub.f32 d0, d0, d16
1809 0xeec08a89, //vdiv.f32 s17, s1, s18
1810 0xee808a09, //vdiv.f32 s16, s0, s18
1811 0xf3fb1708, //vcvt.s32.f32 d17, d8
1812 0xf3fb1621, //vcvt.f32.s32 d17, d17
1813 0xf3612e88, //vcgt.f32 d18, d17, d8
1814 0xf35421b3, //vbsl d18, d20, d19
1815 0xf2611da2, //vsub.f32 d17, d17, d18
1816 0xf3c72e1f, //vmov.i8 d18, #255
1817 0xf2e119c9, //vmul.f32 d17, d17, d9[0]
1818 0xf2601d21, //vsub.f32 d17, d0, d17
1819 0xf2611da0, //vsub.f32 d17, d17, d16
1820 0xf26008a2, //vadd.i32 d16, d16, d18
1821 0xf3f91721, //vabs.f32 d17, d17
1822 0xf2210fa0, //vmin.f32 d0, d17, d16
1823 0xecbd8b04, //vpop {d8-d9}
1824 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001825 0xe320f000, //nop {0}
1826 0x3f800000, //.word 0x3f800000
1827 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001828};
1829
1830CODE const uint32_t sk_mirror_y_vfp4[] = {
1831 0xed2d8b04, //vpush {d8-d9}
1832 0xe8911008, //ldm r1, {r3, ip}
1833 0xf2c03010, //vmov.i32 d19, #0
Mike Klein894d5612017-03-07 07:59:52 -05001834 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001835 0xeddf4b14, //vldr d20, [pc, #80]
Mike Klein894d5612017-03-07 07:59:52 -05001836 0xed938a00, //vldr s16, [r3]
1837 0xee389a08, //vadd.f32 s18, s16, s16
1838 0xf3f40c08, //vdup.32 d16, d8[0]
1839 0xf2211d20, //vsub.f32 d1, d1, d16
1840 0xeec18a89, //vdiv.f32 s17, s3, s18
1841 0xee818a09, //vdiv.f32 s16, s2, s18
1842 0xf3fb1708, //vcvt.s32.f32 d17, d8
1843 0xf3fb1621, //vcvt.f32.s32 d17, d17
1844 0xf3612e88, //vcgt.f32 d18, d17, d8
1845 0xf35421b3, //vbsl d18, d20, d19
1846 0xf2611da2, //vsub.f32 d17, d17, d18
1847 0xf3c72e1f, //vmov.i8 d18, #255
1848 0xf2e119c9, //vmul.f32 d17, d17, d9[0]
1849 0xf2611d21, //vsub.f32 d17, d1, d17
1850 0xf2611da0, //vsub.f32 d17, d17, d16
1851 0xf26008a2, //vadd.i32 d16, d16, d18
1852 0xf3f91721, //vabs.f32 d17, d17
1853 0xf2211fa0, //vmin.f32 d1, d17, d16
1854 0xecbd8b04, //vpop {d8-d9}
1855 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001856 0xe320f000, //nop {0}
1857 0x3f800000, //.word 0x3f800000
1858 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001859};
1860
Mike Kleine9ed07d2017-03-07 12:28:11 -05001861CODE const uint32_t sk_luminance_to_alpha_vfp4[] = {
Mike Klein5224f462017-03-07 17:29:54 -05001862 0xeddf0b0a, //vldr d16, [pc, #40]
1863 0xeddf1b0b, //vldr d17, [pc, #44]
1864 0xf3410d30, //vmul.f32 d16, d1, d16
1865 0xe4913004, //ldr r3, [r1], #4
1866 0xf3401d31, //vmul.f32 d17, d0, d17
Mike Kleine9ed07d2017-03-07 12:28:11 -05001867 0xf2800010, //vmov.i32 d0, #0
1868 0xf2801010, //vmov.i32 d1, #0
1869 0xf2013da0, //vadd.f32 d3, d17, d16
Mike Klein5224f462017-03-07 17:29:54 -05001870 0xeddf0b06, //vldr d16, [pc, #24]
1871 0xf2023c30, //vfma.f32 d3, d2, d16
Mike Kleine9ed07d2017-03-07 12:28:11 -05001872 0xf2802010, //vmov.i32 d2, #0
Mike Kleine9ed07d2017-03-07 12:28:11 -05001873 0xe12fff13, //bx r3
Mike Klein5224f462017-03-07 17:29:54 -05001874 0x3f371759, //.word 0x3f371759
1875 0x3f371759, //.word 0x3f371759
1876 0x3e59b3d0, //.word 0x3e59b3d0
1877 0x3e59b3d0, //.word 0x3e59b3d0
1878 0x3d93dd98, //.word 0x3d93dd98
1879 0x3d93dd98, //.word 0x3d93dd98
Mike Kleine9ed07d2017-03-07 12:28:11 -05001880};
1881
Mike Klein894d5612017-03-07 07:59:52 -05001882CODE const uint32_t sk_matrix_2x3_vfp4[] = {
1883 0xe92d4800, //push {fp, lr}
1884 0xe591e000, //ldr lr, [r1]
1885 0xe591c004, //ldr ip, [r1, #4]
1886 0xe2811008, //add r1, r1, #8
1887 0xe28e300c, //add r3, lr, #12
1888 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1889 0xe28e3008, //add r3, lr, #8
1890 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1891 0xe28e3010, //add r3, lr, #16
1892 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1893 0xe28e3014, //add r3, lr, #20
1894 0xf2410c31, //vfma.f32 d16, d1, d17
1895 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
Mike Klein894d5612017-03-07 07:59:52 -05001896 0xf2411c32, //vfma.f32 d17, d1, d18
Mike Klein64b97482017-03-14 17:35:04 -07001897 0xf4ee2c9d, //vld1.32 {d18[]}, [lr :32]!
1898 0xf4ee3c9f, //vld1.32 {d19[]}, [lr :32]
Mike Klein894d5612017-03-07 07:59:52 -05001899 0xf2400c32, //vfma.f32 d16, d0, d18
1900 0xf2401c33, //vfma.f32 d17, d0, d19
1901 0xf22001b0, //vorr d0, d16, d16
1902 0xf22111b1, //vorr d1, d17, d17
1903 0xe8bd4800, //pop {fp, lr}
1904 0xe12fff1c, //bx ip
1905};
1906
1907CODE const uint32_t sk_matrix_3x4_vfp4[] = {
1908 0xe92d4800, //push {fp, lr}
1909 0xe591e000, //ldr lr, [r1]
1910 0xe591c004, //ldr ip, [r1, #4]
1911 0xe2811008, //add r1, r1, #8
1912 0xe28e3020, //add r3, lr, #32
1913 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1914 0xe28e302c, //add r3, lr, #44
1915 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1916 0xe28e301c, //add r3, lr, #28
1917 0xf2420c33, //vfma.f32 d16, d2, d19
1918 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1919 0xe28e3018, //add r3, lr, #24
1920 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1921 0xe28e3024, //add r3, lr, #36
1922 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1923 0xe28e3028, //add r3, lr, #40
1924 0xf2421c32, //vfma.f32 d17, d2, d18
1925 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1926 0xe28e3010, //add r3, lr, #16
1927 0xf2422c34, //vfma.f32 d18, d2, d20
1928 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1929 0xe28e300c, //add r3, lr, #12
1930 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1931 0xe28e3014, //add r3, lr, #20
1932 0xf2411c34, //vfma.f32 d17, d1, d20
1933 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1934 0xf2410c34, //vfma.f32 d16, d1, d20
Mike Klein894d5612017-03-07 07:59:52 -05001935 0xe28e3008, //add r3, lr, #8
Mike Klein64b97482017-03-14 17:35:04 -07001936 0xf2412c33, //vfma.f32 d18, d1, d19
1937 0xf4ee3c9d, //vld1.32 {d19[]}, [lr :32]!
1938 0xf4ee4c9f, //vld1.32 {d20[]}, [lr :32]
Mike Klein894d5612017-03-07 07:59:52 -05001939 0xf2401c33, //vfma.f32 d17, d0, d19
1940 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1941 0xf2400c33, //vfma.f32 d16, d0, d19
1942 0xf2402c34, //vfma.f32 d18, d0, d20
1943 0xf22101b1, //vorr d0, d17, d17
1944 0xf22021b0, //vorr d2, d16, d16
1945 0xf22211b2, //vorr d1, d18, d18
1946 0xe8bd4800, //pop {fp, lr}
1947 0xe12fff1c, //bx ip
1948};
1949
Mike Kleine9ed07d2017-03-07 12:28:11 -05001950CODE const uint32_t sk_matrix_4x5_vfp4[] = {
Mike Klein64b97482017-03-14 17:35:04 -07001951 0xe92d4010, //push {r4, lr}
1952 0xe8911008, //ldm r1, {r3, ip}
1953 0xf2620112, //vorr d16, d2, d2
Mike Kleine9ed07d2017-03-07 12:28:11 -05001954 0xe2811008, //add r1, r1, #8
Mike Klein64b97482017-03-14 17:35:04 -07001955 0xe2834014, //add r4, r3, #20
1956 0xe1a0e003, //mov lr, r3
1957 0xf4e45c9f, //vld1.32 {d21[]}, [r4 :32]
1958 0xe2834028, //add r4, r3, #40
1959 0xf4e46c9f, //vld1.32 {d22[]}, [r4 :32]
1960 0xe2834038, //add r4, r3, #56
1961 0xf4e47c9f, //vld1.32 {d23[]}, [r4 :32]
1962 0xe2834048, //add r4, r3, #72
1963 0xf4a42c9f, //vld1.32 {d2[]}, [r4 :32]
1964 0xe2834034, //add r4, r3, #52
1965 0xf2032c37, //vfma.f32 d2, d3, d23
1966 0xf4e48c9f, //vld1.32 {d24[]}, [r4 :32]
1967 0xe2834044, //add r4, r3, #68
1968 0xf4e41c9f, //vld1.32 {d17[]}, [r4 :32]
1969 0xe2834030, //add r4, r3, #48
1970 0xf2431c38, //vfma.f32 d17, d3, d24
1971 0xf4e49c9f, //vld1.32 {d25[]}, [r4 :32]
1972 0xe283403c, //add r4, r3, #60
1973 0xf4e43c9f, //vld1.32 {d19[]}, [r4 :32]
1974 0xe283404c, //add r4, r3, #76
1975 0xf2002cb6, //vfma.f32 d2, d16, d22
1976 0xf4e42c9f, //vld1.32 {d18[]}, [r4 :32]
1977 0xe2834040, //add r4, r3, #64
1978 0xf2432c33, //vfma.f32 d18, d3, d19
1979 0xf4e43c9f, //vld1.32 {d19[]}, [r4 :32]
1980 0xe2834020, //add r4, r3, #32
1981 0xf2433c39, //vfma.f32 d19, d3, d25
1982 0xf4e47c9f, //vld1.32 {d23[]}, [r4 :32]
1983 0xe283402c, //add r4, r3, #44
1984 0xf4e48c9f, //vld1.32 {d24[]}, [r4 :32]
1985 0xe2834024, //add r4, r3, #36
1986 0xf2402cb8, //vfma.f32 d18, d16, d24
1987 0xf4e48c9f, //vld1.32 {d24[]}, [r4 :32]
1988 0xf2401cb8, //vfma.f32 d17, d16, d24
1989 0xe2834010, //add r4, r3, #16
1990 0xf2403cb7, //vfma.f32 d19, d16, d23
1991 0xf4ee4c9d, //vld1.32 {d20[]}, [lr :32]!
1992 0xf4e40c9f, //vld1.32 {d16[]}, [r4 :32]
1993 0xe283401c, //add r4, r3, #28
1994 0xf4e46c9f, //vld1.32 {d22[]}, [r4 :32]
1995 0xe2834018, //add r4, r3, #24
1996 0xf2412c36, //vfma.f32 d18, d1, d22
1997 0xf2411c35, //vfma.f32 d17, d1, d21
1998 0xf4ee5c9f, //vld1.32 {d21[]}, [lr :32]
Mike Kleine9ed07d2017-03-07 12:28:11 -05001999 0xf2413c30, //vfma.f32 d19, d1, d16
Mike Klein64b97482017-03-14 17:35:04 -07002000 0xf4e40c9f, //vld1.32 {d16[]}, [r4 :32]
2001 0xe2834008, //add r4, r3, #8
2002 0xe283300c, //add r3, r3, #12
2003 0xf2012c30, //vfma.f32 d2, d1, d16
2004 0xf4e40c9f, //vld1.32 {d16[]}, [r4 :32]
2005 0xf2401c35, //vfma.f32 d17, d0, d21
2006 0xf2403c34, //vfma.f32 d19, d0, d20
2007 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
Mike Kleine9ed07d2017-03-07 12:28:11 -05002008 0xf2402c34, //vfma.f32 d18, d0, d20
2009 0xf2002c30, //vfma.f32 d2, d0, d16
Mike Klein64b97482017-03-14 17:35:04 -07002010 0xf22111b1, //vorr d1, d17, d17
2011 0xf22301b3, //vorr d0, d19, d19
2012 0xf22231b2, //vorr d3, d18, d18
2013 0xe8bd4010, //pop {r4, lr}
Mike Kleine9ed07d2017-03-07 12:28:11 -05002014 0xe12fff1c, //bx ip
2015};
2016
Mike Klein894d5612017-03-07 07:59:52 -05002017CODE const uint32_t sk_matrix_perspective_vfp4[] = {
Mike Klein64b97482017-03-14 17:35:04 -07002018 0xe92d4010, //push {r4, lr}
Mike Klein894d5612017-03-07 07:59:52 -05002019 0xe591e000, //ldr lr, [r1]
2020 0xe591c004, //ldr ip, [r1, #4]
2021 0xe2811008, //add r1, r1, #8
2022 0xe28e301c, //add r3, lr, #28
Mike Klein64b97482017-03-14 17:35:04 -07002023 0xe28e4010, //add r4, lr, #16
Mike Klein894d5612017-03-07 07:59:52 -05002024 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
2025 0xe28e3020, //add r3, lr, #32
2026 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
2027 0xe28e3018, //add r3, lr, #24
2028 0xf2411c30, //vfma.f32 d17, d1, d16
2029 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
Mike Klein64b97482017-03-14 17:35:04 -07002030 0xe1a0300e, //mov r3, lr
2031 0xf4e42c9f, //vld1.32 {d18[]}, [r4 :32]
2032 0xe28e4008, //add r4, lr, #8
2033 0xf4e43c9f, //vld1.32 {d19[]}, [r4 :32]
Mike Klein894d5612017-03-07 07:59:52 -05002034 0xf2401c30, //vfma.f32 d17, d0, d16
Mike Klein64b97482017-03-14 17:35:04 -07002035 0xf4e30c9d, //vld1.32 {d16[]}, [r3 :32]!
Mike Klein894d5612017-03-07 07:59:52 -05002036 0xf4e35c9f, //vld1.32 {d21[]}, [r3 :32]
Mike Klein64b97482017-03-14 17:35:04 -07002037 0xe28e3014, //add r3, lr, #20
2038 0xf2413c35, //vfma.f32 d19, d1, d21
2039 0xf4e35c9f, //vld1.32 {d21[]}, [r3 :32]
2040 0xe28e300c, //add r3, lr, #12
2041 0xf2415c32, //vfma.f32 d21, d1, d18
2042 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
2043 0xf3fb4521, //vrecpe.f32 d20, d17
2044 0xf2403c30, //vfma.f32 d19, d0, d16
2045 0xf2411fb4, //vrecps.f32 d17, d17, d20
2046 0xf2405c32, //vfma.f32 d21, d0, d18
2047 0xf3440db1, //vmul.f32 d16, d20, d17
2048 0xf3030db0, //vmul.f32 d0, d19, d16
2049 0xf3051db0, //vmul.f32 d1, d21, d16
2050 0xe8bd4010, //pop {r4, lr}
Mike Klein894d5612017-03-07 07:59:52 -05002051 0xe12fff1c, //bx ip
2052};
2053
2054CODE const uint32_t sk_linear_gradient_2stops_vfp4[] = {
2055 0xe8911008, //ldm r1, {r3, ip}
2056 0xe2811008, //add r1, r1, #8
2057 0xf4632a0d, //vld1.8 {d18-d19}, [r3]!
2058 0xf4634a0f, //vld1.8 {d20-d21}, [r3]
2059 0xf3f40c22, //vdup.32 d16, d18[0]
2060 0xf3f41c24, //vdup.32 d17, d20[0]
2061 0xf2400c31, //vfma.f32 d16, d0, d17
2062 0xf3fc6c24, //vdup.32 d22, d20[1]
2063 0xf3bc1c22, //vdup.32 d1, d18[1]
2064 0xf3b42c23, //vdup.32 d2, d19[0]
2065 0xf2001c36, //vfma.f32 d1, d0, d22
2066 0xf3f41c25, //vdup.32 d17, d21[0]
2067 0xf3fc4c25, //vdup.32 d20, d21[1]
2068 0xf2002c31, //vfma.f32 d2, d0, d17
2069 0xf3bc3c23, //vdup.32 d3, d19[1]
2070 0xf2003c34, //vfma.f32 d3, d0, d20
2071 0xf22001b0, //vorr d0, d16, d16
2072 0xe12fff1c, //bx ip
2073};
2074#elif defined(__x86_64__)
2075
2076CODE const uint8_t sk_start_pipeline_hsw[] = {
2077 65,87, //push %r15
2078 65,86, //push %r14
2079 65,85, //push %r13
2080 65,84, //push %r12
2081 83, //push %rbx
2082 73,137,205, //mov %rcx,%r13
2083 73,137,214, //mov %rdx,%r14
2084 72,137,251, //mov %rdi,%rbx
2085 72,173, //lods %ds:(%rsi),%rax
2086 73,137,199, //mov %rax,%r15
2087 73,137,244, //mov %rsi,%r12
2088 72,141,67,8, //lea 0x8(%rbx),%rax
2089 76,57,232, //cmp %r13,%rax
2090 118,5, //jbe 28 <_sk_start_pipeline_hsw+0x28>
2091 72,137,223, //mov %rbx,%rdi
2092 235,65, //jmp 69 <_sk_start_pipeline_hsw+0x69>
2093 185,0,0,0,0, //mov $0x0,%ecx
2094 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2095 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2096 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2097 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2098 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
2099 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
2100 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
2101 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
2102 72,137,223, //mov %rbx,%rdi
2103 76,137,230, //mov %r12,%rsi
2104 76,137,242, //mov %r14,%rdx
2105 65,255,215, //callq *%r15
2106 72,141,123,8, //lea 0x8(%rbx),%rdi
2107 72,131,195,16, //add $0x10,%rbx
2108 76,57,235, //cmp %r13,%rbx
2109 72,137,251, //mov %rdi,%rbx
2110 118,191, //jbe 28 <_sk_start_pipeline_hsw+0x28>
2111 76,137,233, //mov %r13,%rcx
2112 72,41,249, //sub %rdi,%rcx
2113 116,41, //je 9a <_sk_start_pipeline_hsw+0x9a>
2114 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2115 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2116 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2117 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2118 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
2119 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
2120 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
2121 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
2122 76,137,230, //mov %r12,%rsi
2123 76,137,242, //mov %r14,%rdx
2124 65,255,215, //callq *%r15
2125 76,137,232, //mov %r13,%rax
2126 91, //pop %rbx
2127 65,92, //pop %r12
2128 65,93, //pop %r13
2129 65,94, //pop %r14
2130 65,95, //pop %r15
2131 197,248,119, //vzeroupper
2132 195, //retq
2133};
2134
2135CODE const uint8_t sk_just_return_hsw[] = {
2136 195, //retq
2137};
2138
2139CODE const uint8_t sk_seed_shader_hsw[] = {
2140 72,173, //lods %ds:(%rsi),%rax
2141 197,249,110,199, //vmovd %edi,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07002142 196,226,125,88,192, //vpbroadcastd %xmm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05002143 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002144 65,184,0,0,0,63, //mov $0x3f000000,%r8d
2145 196,193,121,110,200, //vmovd %r8d,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07002146 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05002147 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05002148 197,252,88,2, //vaddps (%rdx),%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05002149 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
2150 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
2151 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002152 184,0,0,128,63, //mov $0x3f800000,%eax
2153 197,249,110,208, //vmovd %eax,%xmm2
Mike Klein64b97482017-03-14 17:35:04 -07002154 196,226,125,88,210, //vpbroadcastd %xmm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002155 72,173, //lods %ds:(%rsi),%rax
2156 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2157 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
2158 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
2159 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
2160 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
2161 255,224, //jmpq *%rax
2162};
2163
2164CODE const uint8_t sk_constant_color_hsw[] = {
2165 72,173, //lods %ds:(%rsi),%rax
2166 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
2167 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
2168 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
2169 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
2170 72,173, //lods %ds:(%rsi),%rax
2171 255,224, //jmpq *%rax
2172};
2173
2174CODE const uint8_t sk_clear_hsw[] = {
2175 72,173, //lods %ds:(%rsi),%rax
2176 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2177 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2178 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2179 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2180 255,224, //jmpq *%rax
2181};
2182
2183CODE const uint8_t sk_plus__hsw[] = {
2184 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
2185 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
2186 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
2187 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
2188 72,173, //lods %ds:(%rsi),%rax
2189 255,224, //jmpq *%rax
2190};
2191
2192CODE const uint8_t sk_srcover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002193 184,0,0,128,63, //mov $0x3f800000,%eax
2194 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002195 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002196 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
2197 196,194,93,184,192, //vfmadd231ps %ymm8,%ymm4,%ymm0
2198 196,194,85,184,200, //vfmadd231ps %ymm8,%ymm5,%ymm1
2199 196,194,77,184,208, //vfmadd231ps %ymm8,%ymm6,%ymm2
2200 196,194,69,184,216, //vfmadd231ps %ymm8,%ymm7,%ymm3
2201 72,173, //lods %ds:(%rsi),%rax
2202 255,224, //jmpq *%rax
2203};
2204
2205CODE const uint8_t sk_dstover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002206 184,0,0,128,63, //mov $0x3f800000,%eax
2207 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002208 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002209 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
2210 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
2211 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
2212 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
2213 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
2214 72,173, //lods %ds:(%rsi),%rax
2215 255,224, //jmpq *%rax
2216};
2217
2218CODE const uint8_t sk_clamp_0_hsw[] = {
2219 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
2220 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
2221 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
2222 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
2223 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
2224 72,173, //lods %ds:(%rsi),%rax
2225 255,224, //jmpq *%rax
2226};
2227
2228CODE const uint8_t sk_clamp_1_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002229 184,0,0,128,63, //mov $0x3f800000,%eax
2230 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002231 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002232 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
2233 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
2234 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
2235 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
2236 72,173, //lods %ds:(%rsi),%rax
2237 255,224, //jmpq *%rax
2238};
2239
2240CODE const uint8_t sk_clamp_a_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002241 184,0,0,128,63, //mov $0x3f800000,%eax
2242 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002243 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002244 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
2245 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
2246 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
2247 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
2248 72,173, //lods %ds:(%rsi),%rax
2249 255,224, //jmpq *%rax
2250};
2251
2252CODE const uint8_t sk_set_rgb_hsw[] = {
2253 72,173, //lods %ds:(%rsi),%rax
2254 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
2255 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
2256 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
2257 72,173, //lods %ds:(%rsi),%rax
2258 255,224, //jmpq *%rax
2259};
2260
2261CODE const uint8_t sk_swap_rb_hsw[] = {
2262 197,124,40,192, //vmovaps %ymm0,%ymm8
2263 72,173, //lods %ds:(%rsi),%rax
2264 197,252,40,194, //vmovaps %ymm2,%ymm0
2265 197,124,41,194, //vmovaps %ymm8,%ymm2
2266 255,224, //jmpq *%rax
2267};
2268
2269CODE const uint8_t sk_swap_hsw[] = {
2270 197,124,40,195, //vmovaps %ymm3,%ymm8
2271 197,124,40,202, //vmovaps %ymm2,%ymm9
2272 197,124,40,209, //vmovaps %ymm1,%ymm10
2273 197,124,40,216, //vmovaps %ymm0,%ymm11
2274 72,173, //lods %ds:(%rsi),%rax
2275 197,252,40,196, //vmovaps %ymm4,%ymm0
2276 197,252,40,205, //vmovaps %ymm5,%ymm1
2277 197,252,40,214, //vmovaps %ymm6,%ymm2
2278 197,252,40,223, //vmovaps %ymm7,%ymm3
2279 197,124,41,220, //vmovaps %ymm11,%ymm4
2280 197,124,41,213, //vmovaps %ymm10,%ymm5
2281 197,124,41,206, //vmovaps %ymm9,%ymm6
2282 197,124,41,199, //vmovaps %ymm8,%ymm7
2283 255,224, //jmpq *%rax
2284};
2285
2286CODE const uint8_t sk_move_src_dst_hsw[] = {
2287 72,173, //lods %ds:(%rsi),%rax
2288 197,252,40,224, //vmovaps %ymm0,%ymm4
2289 197,252,40,233, //vmovaps %ymm1,%ymm5
2290 197,252,40,242, //vmovaps %ymm2,%ymm6
2291 197,252,40,251, //vmovaps %ymm3,%ymm7
2292 255,224, //jmpq *%rax
2293};
2294
2295CODE const uint8_t sk_move_dst_src_hsw[] = {
2296 72,173, //lods %ds:(%rsi),%rax
2297 197,252,40,196, //vmovaps %ymm4,%ymm0
2298 197,252,40,205, //vmovaps %ymm5,%ymm1
2299 197,252,40,214, //vmovaps %ymm6,%ymm2
2300 197,252,40,223, //vmovaps %ymm7,%ymm3
2301 255,224, //jmpq *%rax
2302};
2303
2304CODE const uint8_t sk_premul_hsw[] = {
2305 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
2306 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
2307 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
2308 72,173, //lods %ds:(%rsi),%rax
2309 255,224, //jmpq *%rax
2310};
2311
2312CODE const uint8_t sk_unpremul_hsw[] = {
2313 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
2314 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002315 184,0,0,128,63, //mov $0x3f800000,%eax
2316 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07002317 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05002318 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
2319 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
2320 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
2321 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
2322 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
2323 72,173, //lods %ds:(%rsi),%rax
2324 255,224, //jmpq *%rax
2325};
2326
2327CODE const uint8_t sk_from_srgb_hsw[] = {
Mike Klein5224f462017-03-07 17:29:54 -05002328 184,145,131,158,61, //mov $0x3d9e8391,%eax
2329 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002330 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002331 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
2332 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05002333 184,154,153,153,62, //mov $0x3e99999a,%eax
2334 197,121,110,216, //vmovd %eax,%xmm11
Mike Klein64b97482017-03-14 17:35:04 -07002335 196,66,125,88,219, //vpbroadcastd %xmm11,%ymm11
Mike Klein5224f462017-03-07 17:29:54 -05002336 184,92,143,50,63, //mov $0x3f328f5c,%eax
2337 197,121,110,224, //vmovd %eax,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -07002338 196,66,125,88,228, //vpbroadcastd %xmm12,%ymm12
2339 196,65,125,111,235, //vmovdqa %ymm11,%ymm13
Mike Klein894d5612017-03-07 07:59:52 -05002340 196,66,125,168,236, //vfmadd213ps %ymm12,%ymm0,%ymm13
Mike Klein5224f462017-03-07 17:29:54 -05002341 184,10,215,35,59, //mov $0x3b23d70a,%eax
2342 197,121,110,240, //vmovd %eax,%xmm14
Mike Klein64b97482017-03-14 17:35:04 -07002343 196,66,125,88,246, //vpbroadcastd %xmm14,%ymm14
Mike Klein894d5612017-03-07 07:59:52 -05002344 196,66,45,168,238, //vfmadd213ps %ymm14,%ymm10,%ymm13
Mike Klein5224f462017-03-07 17:29:54 -05002345 184,174,71,97,61, //mov $0x3d6147ae,%eax
2346 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07002347 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05002348 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
2349 196,195,21,74,193,0, //vblendvps %ymm0,%ymm9,%ymm13,%ymm0
2350 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
2351 197,116,89,233, //vmulps %ymm1,%ymm1,%ymm13
Mike Klein64b97482017-03-14 17:35:04 -07002352 196,65,125,111,251, //vmovdqa %ymm11,%ymm15
Mike Klein894d5612017-03-07 07:59:52 -05002353 196,66,117,168,252, //vfmadd213ps %ymm12,%ymm1,%ymm15
2354 196,66,21,168,254, //vfmadd213ps %ymm14,%ymm13,%ymm15
2355 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
2356 196,195,5,74,201,16, //vblendvps %ymm1,%ymm9,%ymm15,%ymm1
2357 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
2358 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
2359 196,66,109,168,220, //vfmadd213ps %ymm12,%ymm2,%ymm11
2360 196,66,53,168,222, //vfmadd213ps %ymm14,%ymm9,%ymm11
2361 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
2362 196,195,37,74,208,32, //vblendvps %ymm2,%ymm8,%ymm11,%ymm2
2363 72,173, //lods %ds:(%rsi),%rax
2364 255,224, //jmpq *%rax
2365};
2366
2367CODE const uint8_t sk_to_srgb_hsw[] = {
2368 197,124,82,192, //vrsqrtps %ymm0,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05002369 196,65,124,83,216, //vrcpps %ymm8,%ymm11
2370 196,65,124,82,224, //vrsqrtps %ymm8,%ymm12
2371 184,41,92,71,65, //mov $0x41475c29,%eax
2372 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002373 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05002374 197,60,89,232, //vmulps %ymm0,%ymm8,%ymm13
2375 184,0,0,128,63, //mov $0x3f800000,%eax
2376 197,121,110,200, //vmovd %eax,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07002377 196,66,125,88,201, //vpbroadcastd %xmm9,%ymm9
Mike Klein5224f462017-03-07 17:29:54 -05002378 184,194,135,210,62, //mov $0x3ed287c2,%eax
2379 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07002380 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05002381 184,206,111,48,63, //mov $0x3f306fce,%eax
2382 197,121,110,240, //vmovd %eax,%xmm14
Mike Klein64b97482017-03-14 17:35:04 -07002383 196,66,125,88,246, //vpbroadcastd %xmm14,%ymm14
Mike Klein5224f462017-03-07 17:29:54 -05002384 184,168,87,202,61, //mov $0x3dca57a8,%eax
2385 53,0,0,0,128, //xor $0x80000000,%eax
2386 197,121,110,248, //vmovd %eax,%xmm15
Mike Klein64b97482017-03-14 17:35:04 -07002387 196,66,125,88,255, //vpbroadcastd %xmm15,%ymm15
Mike Klein894d5612017-03-07 07:59:52 -05002388 196,66,13,168,223, //vfmadd213ps %ymm15,%ymm14,%ymm11
Mike Klein5224f462017-03-07 17:29:54 -05002389 196,66,45,184,220, //vfmadd231ps %ymm12,%ymm10,%ymm11
2390 196,65,52,93,219, //vminps %ymm11,%ymm9,%ymm11
2391 184,4,231,140,59, //mov $0x3b8ce704,%eax
2392 197,121,110,224, //vmovd %eax,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -07002393 196,66,125,88,228, //vpbroadcastd %xmm12,%ymm12
Mike Klein5224f462017-03-07 17:29:54 -05002394 196,193,124,194,196,1, //vcmpltps %ymm12,%ymm0,%ymm0
2395 196,195,37,74,197,0, //vblendvps %ymm0,%ymm13,%ymm11,%ymm0
2396 197,124,82,217, //vrsqrtps %ymm1,%ymm11
2397 196,65,124,83,235, //vrcpps %ymm11,%ymm13
2398 196,65,124,82,219, //vrsqrtps %ymm11,%ymm11
2399 196,66,13,168,239, //vfmadd213ps %ymm15,%ymm14,%ymm13
2400 196,66,45,184,235, //vfmadd231ps %ymm11,%ymm10,%ymm13
2401 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
2402 196,65,52,93,237, //vminps %ymm13,%ymm9,%ymm13
2403 196,193,116,194,204,1, //vcmpltps %ymm12,%ymm1,%ymm1
2404 196,195,21,74,203,16, //vblendvps %ymm1,%ymm11,%ymm13,%ymm1
2405 197,124,82,218, //vrsqrtps %ymm2,%ymm11
2406 196,65,124,83,235, //vrcpps %ymm11,%ymm13
2407 196,66,13,168,239, //vfmadd213ps %ymm15,%ymm14,%ymm13
2408 196,65,124,82,219, //vrsqrtps %ymm11,%ymm11
2409 196,66,45,184,235, //vfmadd231ps %ymm11,%ymm10,%ymm13
2410 196,65,52,93,205, //vminps %ymm13,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05002411 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05002412 196,193,108,194,212,1, //vcmpltps %ymm12,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002413 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
2414 72,173, //lods %ds:(%rsi),%rax
2415 255,224, //jmpq *%rax
2416};
2417
2418CODE const uint8_t sk_scale_1_float_hsw[] = {
2419 72,173, //lods %ds:(%rsi),%rax
2420 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
2421 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
2422 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
2423 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
2424 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
2425 72,173, //lods %ds:(%rsi),%rax
2426 255,224, //jmpq *%rax
2427};
2428
2429CODE const uint8_t sk_scale_u8_hsw[] = {
2430 73,137,200, //mov %rcx,%r8
2431 72,173, //lods %ds:(%rsi),%rax
2432 72,139,0, //mov (%rax),%rax
2433 72,1,248, //add %rdi,%rax
2434 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05002435 117,56, //jne 4bf <_sk_scale_u8_hsw+0x48>
Mike Klein64b97482017-03-14 17:35:04 -07002436 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05002437 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
2438 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002439 184,129,128,128,59, //mov $0x3b808081,%eax
2440 197,121,110,200, //vmovd %eax,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07002441 196,66,125,88,201, //vpbroadcastd %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05002442 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
2443 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
2444 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
2445 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
2446 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
2447 72,173, //lods %ds:(%rsi),%rax
2448 76,137,193, //mov %r8,%rcx
2449 255,224, //jmpq *%rax
2450 49,201, //xor %ecx,%ecx
2451 77,137,194, //mov %r8,%r10
2452 69,49,201, //xor %r9d,%r9d
2453 68,15,182,24, //movzbl (%rax),%r11d
2454 72,255,192, //inc %rax
2455 73,211,227, //shl %cl,%r11
2456 77,9,217, //or %r11,%r9
2457 72,131,193,8, //add $0x8,%rcx
2458 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05002459 117,234, //jne 4c7 <_sk_scale_u8_hsw+0x50>
Mike Klein894d5612017-03-07 07:59:52 -05002460 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05002461 235,167, //jmp 48b <_sk_scale_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05002462};
2463
2464CODE const uint8_t sk_lerp_1_float_hsw[] = {
2465 72,173, //lods %ds:(%rsi),%rax
2466 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
2467 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
2468 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
2469 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
2470 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
2471 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
2472 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
2473 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
2474 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
2475 72,173, //lods %ds:(%rsi),%rax
2476 255,224, //jmpq *%rax
2477};
2478
2479CODE const uint8_t sk_lerp_u8_hsw[] = {
2480 73,137,200, //mov %rcx,%r8
2481 72,173, //lods %ds:(%rsi),%rax
2482 72,139,0, //mov (%rax),%rax
2483 72,1,248, //add %rdi,%rax
2484 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05002485 117,76, //jne 56f <_sk_lerp_u8_hsw+0x5c>
Mike Klein64b97482017-03-14 17:35:04 -07002486 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05002487 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
2488 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002489 184,129,128,128,59, //mov $0x3b808081,%eax
2490 197,121,110,200, //vmovd %eax,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07002491 196,66,125,88,201, //vpbroadcastd %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05002492 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
2493 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
2494 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
2495 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
2496 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
2497 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
2498 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
2499 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
2500 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
2501 72,173, //lods %ds:(%rsi),%rax
2502 76,137,193, //mov %r8,%rcx
2503 255,224, //jmpq *%rax
2504 49,201, //xor %ecx,%ecx
2505 77,137,194, //mov %r8,%r10
2506 69,49,201, //xor %r9d,%r9d
2507 68,15,182,24, //movzbl (%rax),%r11d
2508 72,255,192, //inc %rax
2509 73,211,227, //shl %cl,%r11
2510 77,9,217, //or %r11,%r9
2511 72,131,193,8, //add $0x8,%rcx
2512 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05002513 117,234, //jne 577 <_sk_lerp_u8_hsw+0x64>
Mike Klein894d5612017-03-07 07:59:52 -05002514 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05002515 235,147, //jmp 527 <_sk_lerp_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05002516};
2517
2518CODE const uint8_t sk_lerp_565_hsw[] = {
2519 72,173, //lods %ds:(%rsi),%rax
2520 76,139,16, //mov (%rax),%r10
2521 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05002522 15,133,179,0,0,0, //jne 655 <_sk_lerp_565_hsw+0xc1>
Mike Klein894d5612017-03-07 07:59:52 -05002523 196,193,122,111,28,122, //vmovdqu (%r10,%rdi,2),%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05002524 196,98,125,51,195, //vpmovzxwd %xmm3,%ymm8
2525 184,0,248,0,0, //mov $0xf800,%eax
2526 197,249,110,216, //vmovd %eax,%xmm3
2527 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
2528 196,193,101,219,216, //vpand %ymm8,%ymm3,%ymm3
2529 197,124,91,203, //vcvtdq2ps %ymm3,%ymm9
2530 184,8,33,132,55, //mov $0x37842108,%eax
2531 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002532 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002533 197,52,89,203, //vmulps %ymm3,%ymm9,%ymm9
2534 184,224,7,0,0, //mov $0x7e0,%eax
2535 197,249,110,216, //vmovd %eax,%xmm3
2536 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
2537 196,193,101,219,216, //vpand %ymm8,%ymm3,%ymm3
2538 197,124,91,211, //vcvtdq2ps %ymm3,%ymm10
2539 184,33,8,2,58, //mov $0x3a020821,%eax
2540 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002541 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002542 197,44,89,211, //vmulps %ymm3,%ymm10,%ymm10
2543 184,31,0,0,0, //mov $0x1f,%eax
2544 197,249,110,216, //vmovd %eax,%xmm3
2545 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
2546 196,193,101,219,216, //vpand %ymm8,%ymm3,%ymm3
2547 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
2548 184,8,33,4,61, //mov $0x3d042108,%eax
2549 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002550 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002551 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05002552 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05002553 196,226,53,168,196, //vfmadd213ps %ymm4,%ymm9,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05002554 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05002555 196,226,45,168,205, //vfmadd213ps %ymm5,%ymm10,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05002556 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
2557 196,226,101,168,214, //vfmadd213ps %ymm6,%ymm3,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002558 184,0,0,128,63, //mov $0x3f800000,%eax
2559 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002560 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05002561 72,173, //lods %ds:(%rsi),%rax
2562 255,224, //jmpq *%rax
2563 65,137,200, //mov %ecx,%r8d
2564 65,128,224,7, //and $0x7,%r8b
2565 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
2566 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05002567 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07002568 15,135,59,255,255,255, //ja 5a8 <_sk_lerp_565_hsw+0x14>
2569 69,15,182,192, //movzbl %r8b,%r8d
Mike Klein5224f462017-03-07 17:29:54 -05002570 76,141,13,76,0,0,0, //lea 0x4c(%rip),%r9 # 6c4 <_sk_lerp_565_hsw+0x130>
Mike Klein894d5612017-03-07 07:59:52 -05002571 75,99,4,129, //movslq (%r9,%r8,4),%rax
2572 76,1,200, //add %r9,%rax
2573 255,224, //jmpq *%rax
2574 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
2575 196,193,97,196,92,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm3
2576 196,193,97,196,92,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm3,%xmm3
2577 196,193,97,196,92,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm3,%xmm3
2578 196,193,97,196,92,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm3,%xmm3
2579 196,193,97,196,92,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
2580 196,193,97,196,92,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
2581 196,193,97,196,28,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05002582 233,231,254,255,255, //jmpq 5a8 <_sk_lerp_565_hsw+0x14>
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002583 15,31,0, //nopl (%rax)
2584 241, //icebp
Mike Klein894d5612017-03-07 07:59:52 -05002585 255, //(bad)
2586 255, //(bad)
2587 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07002588 233,255,255,255,225, //jmpq ffffffffe20006cc <_sk_linear_gradient_2stops_hsw+0xffffffffe1fff4f0>
Mike Klein894d5612017-03-07 07:59:52 -05002589 255, //(bad)
2590 255, //(bad)
2591 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002592 217,255, //fcos
Mike Klein894d5612017-03-07 07:59:52 -05002593 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002594 255,209, //callq *%rcx
Mike Klein894d5612017-03-07 07:59:52 -05002595 255, //(bad)
2596 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002597 255,201, //dec %ecx
Mike Klein894d5612017-03-07 07:59:52 -05002598 255, //(bad)
2599 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002600 255, //(bad)
2601 189, //.byte 0xbd
Mike Klein894d5612017-03-07 07:59:52 -05002602 255, //(bad)
2603 255, //(bad)
2604 255, //.byte 0xff
2605};
2606
2607CODE const uint8_t sk_load_tables_hsw[] = {
2608 73,137,200, //mov %rcx,%r8
2609 72,173, //lods %ds:(%rsi),%rax
2610 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
2611 76,3,8, //add (%rax),%r9
2612 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05002613 117,121, //jne 76e <_sk_load_tables_hsw+0x8e>
Mike Klein894d5612017-03-07 07:59:52 -05002614 196,193,126,111,25, //vmovdqu (%r9),%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002615 185,255,0,0,0, //mov $0xff,%ecx
2616 197,249,110,193, //vmovd %ecx,%xmm0
2617 196,226,125,88,208, //vpbroadcastd %xmm0,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002618 197,237,219,203, //vpand %ymm3,%ymm2,%ymm1
2619 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
2620 72,139,72,8, //mov 0x8(%rax),%rcx
2621 76,139,72,16, //mov 0x10(%rax),%r9
2622 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
2623 196,226,53,146,4,137, //vgatherdps %ymm9,(%rcx,%ymm1,4),%ymm0
2624 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
2625 197,109,219,201, //vpand %ymm1,%ymm2,%ymm9
2626 196,65,45,118,210, //vpcmpeqd %ymm10,%ymm10,%ymm10
2627 196,130,45,146,12,137, //vgatherdps %ymm10,(%r9,%ymm9,4),%ymm1
2628 72,139,64,24, //mov 0x18(%rax),%rax
2629 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
2630 196,65,109,219,201, //vpand %ymm9,%ymm2,%ymm9
2631 196,162,61,146,20,136, //vgatherdps %ymm8,(%rax,%ymm9,4),%ymm2
2632 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002633 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
2634 184,129,128,128,59, //mov $0x3b808081,%eax
2635 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002636 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002637 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05002638 72,173, //lods %ds:(%rsi),%rax
2639 76,137,193, //mov %r8,%rcx
2640 255,224, //jmpq *%rax
2641 185,8,0,0,0, //mov $0x8,%ecx
2642 68,41,193, //sub %r8d,%ecx
2643 192,225,3, //shl $0x3,%cl
2644 73,199,194,255,255,255,255, //mov $0xffffffffffffffff,%r10
2645 73,211,234, //shr %cl,%r10
2646 196,193,249,110,194, //vmovq %r10,%xmm0
2647 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
2648 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002649 233,99,255,255,255, //jmpq 6fa <_sk_load_tables_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05002650};
2651
2652CODE const uint8_t sk_load_a8_hsw[] = {
2653 73,137,200, //mov %rcx,%r8
2654 72,173, //lods %ds:(%rsi),%rax
2655 72,139,0, //mov (%rax),%rax
2656 72,1,248, //add %rdi,%rax
2657 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05002658 117,50, //jne 7d9 <_sk_load_a8_hsw+0x42>
Mike Klein64b97482017-03-14 17:35:04 -07002659 197,250,126,0, //vmovq (%rax),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05002660 196,226,125,49,192, //vpmovzxbd %xmm0,%ymm0
2661 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05002662 184,129,128,128,59, //mov $0x3b808081,%eax
2663 197,249,110,200, //vmovd %eax,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07002664 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05002665 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
2666 72,173, //lods %ds:(%rsi),%rax
2667 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2668 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2669 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2670 76,137,193, //mov %r8,%rcx
2671 255,224, //jmpq *%rax
2672 49,201, //xor %ecx,%ecx
2673 77,137,194, //mov %r8,%r10
2674 69,49,201, //xor %r9d,%r9d
2675 68,15,182,24, //movzbl (%rax),%r11d
2676 72,255,192, //inc %rax
2677 73,211,227, //shl %cl,%r11
2678 77,9,217, //or %r11,%r9
2679 72,131,193,8, //add $0x8,%rcx
2680 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05002681 117,234, //jne 7e1 <_sk_load_a8_hsw+0x4a>
Mike Klein894d5612017-03-07 07:59:52 -05002682 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05002683 235,173, //jmp 7ab <_sk_load_a8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05002684};
2685
2686CODE const uint8_t sk_store_a8_hsw[] = {
2687 72,173, //lods %ds:(%rsi),%rax
2688 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05002689 184,0,0,127,67, //mov $0x437f0000,%eax
2690 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002691 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002692 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
2693 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
2694 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
2695 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
2696 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
2697 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05002698 117,10, //jne 839 <_sk_store_a8_hsw+0x3b>
Mike Klein894d5612017-03-07 07:59:52 -05002699 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
2700 72,173, //lods %ds:(%rsi),%rax
2701 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07002702 65,137,200, //mov %ecx,%r8d
2703 65,128,224,7, //and $0x7,%r8b
2704 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05002705 65,128,248,6, //cmp $0x6,%r8b
Mike Klein5224f462017-03-07 17:29:54 -05002706 119,236, //ja 835 <_sk_store_a8_hsw+0x37>
Mike Klein894d5612017-03-07 07:59:52 -05002707 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002708 65,15,182,192, //movzbl %r8b,%eax
2709 76,141,5,67,0,0,0, //lea 0x43(%rip),%r8 # 89c <_sk_store_a8_hsw+0x9e>
2710 73,99,4,128, //movslq (%r8,%rax,4),%rax
2711 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05002712 255,224, //jmpq *%rax
2713 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
2714 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
2715 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
2716 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
2717 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
2718 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
2719 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Klein64b97482017-03-14 17:35:04 -07002720 235,154, //jmp 835 <_sk_store_a8_hsw+0x37>
Mike Klein5224f462017-03-07 17:29:54 -05002721 144, //nop
2722 246,255, //idiv %bh
Mike Klein894d5612017-03-07 07:59:52 -05002723 255, //(bad)
2724 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002725 238, //out %al,(%dx)
Mike Klein894d5612017-03-07 07:59:52 -05002726 255, //(bad)
2727 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002728 255,230, //jmpq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05002729 255, //(bad)
2730 255, //(bad)
2731 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002732 222,255, //fdivrp %st,%st(7)
Mike Klein894d5612017-03-07 07:59:52 -05002733 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002734 255,214, //callq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05002735 255, //(bad)
2736 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002737 255,206, //dec %esi
Mike Klein894d5612017-03-07 07:59:52 -05002738 255, //(bad)
2739 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002740 255,198, //inc %esi
Mike Klein894d5612017-03-07 07:59:52 -05002741 255, //(bad)
2742 255, //(bad)
2743 255, //.byte 0xff
2744};
2745
2746CODE const uint8_t sk_load_565_hsw[] = {
2747 72,173, //lods %ds:(%rsi),%rax
2748 76,139,16, //mov (%rax),%r10
2749 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07002750 15,133,149,0,0,0, //jne 95b <_sk_load_565_hsw+0xa3>
Mike Klein894d5612017-03-07 07:59:52 -05002751 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
2752 196,226,125,51,208, //vpmovzxwd %xmm0,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05002753 184,0,248,0,0, //mov $0xf800,%eax
2754 197,249,110,192, //vmovd %eax,%xmm0
2755 196,226,125,88,192, //vpbroadcastd %xmm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05002756 197,253,219,194, //vpand %ymm2,%ymm0,%ymm0
2757 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05002758 184,8,33,132,55, //mov $0x37842108,%eax
2759 197,249,110,200, //vmovd %eax,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07002760 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05002761 197,252,89,193, //vmulps %ymm1,%ymm0,%ymm0
2762 184,224,7,0,0, //mov $0x7e0,%eax
2763 197,249,110,200, //vmovd %eax,%xmm1
2764 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05002765 197,245,219,202, //vpand %ymm2,%ymm1,%ymm1
2766 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05002767 184,33,8,2,58, //mov $0x3a020821,%eax
2768 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002769 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002770 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
2771 184,31,0,0,0, //mov $0x1f,%eax
2772 197,249,110,216, //vmovd %eax,%xmm3
2773 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05002774 197,229,219,210, //vpand %ymm2,%ymm3,%ymm2
2775 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05002776 184,8,33,4,61, //mov $0x3d042108,%eax
2777 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002778 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05002779 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
2780 184,0,0,128,63, //mov $0x3f800000,%eax
2781 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002782 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05002783 72,173, //lods %ds:(%rsi),%rax
2784 255,224, //jmpq *%rax
2785 65,137,200, //mov %ecx,%r8d
2786 65,128,224,7, //and $0x7,%r8b
2787 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
2788 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05002789 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07002790 15,135,89,255,255,255, //ja 8cc <_sk_load_565_hsw+0x14>
2791 69,15,182,192, //movzbl %r8b,%r8d
2792 76,141,13,74,0,0,0, //lea 0x4a(%rip),%r9 # 9c8 <_sk_load_565_hsw+0x110>
Mike Klein894d5612017-03-07 07:59:52 -05002793 75,99,4,129, //movslq (%r9,%r8,4),%rax
2794 76,1,200, //add %r9,%rax
2795 255,224, //jmpq *%rax
2796 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
2797 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
2798 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
2799 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
2800 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
2801 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
2802 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
2803 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07002804 233,5,255,255,255, //jmpq 8cc <_sk_load_565_hsw+0x14>
Mike Klein5224f462017-03-07 17:29:54 -05002805 144, //nop
2806 243,255, //repz (bad)
Mike Klein894d5612017-03-07 07:59:52 -05002807 255, //(bad)
2808 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07002809 235,255, //jmp 9cd <_sk_load_565_hsw+0x115>
Mike Klein894d5612017-03-07 07:59:52 -05002810 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002811 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05002812 255, //(bad)
2813 255, //(bad)
2814 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002815 219,255, //(bad)
Mike Klein894d5612017-03-07 07:59:52 -05002816 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002817 255,211, //callq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05002818 255, //(bad)
2819 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002820 255,203, //dec %ebx
Mike Klein894d5612017-03-07 07:59:52 -05002821 255, //(bad)
2822 255, //(bad)
2823 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002824 191, //.byte 0xbf
Mike Klein894d5612017-03-07 07:59:52 -05002825 255, //(bad)
2826 255, //(bad)
2827 255, //.byte 0xff
2828};
2829
2830CODE const uint8_t sk_store_565_hsw[] = {
2831 72,173, //lods %ds:(%rsi),%rax
2832 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05002833 184,0,0,248,65, //mov $0x41f80000,%eax
2834 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002835 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002836 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
2837 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
2838 196,193,53,114,241,11, //vpslld $0xb,%ymm9,%ymm9
Mike Klein5224f462017-03-07 17:29:54 -05002839 184,0,0,124,66, //mov $0x427c0000,%eax
2840 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07002841 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05002842 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
2843 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
2844 196,193,45,114,242,5, //vpslld $0x5,%ymm10,%ymm10
2845 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
2846 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
2847 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
2848 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
2849 196,67,125,57,193,1, //vextracti128 $0x1,%ymm8,%xmm9
2850 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
2851 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07002852 117,10, //jne a50 <_sk_store_565_hsw+0x6c>
Mike Klein894d5612017-03-07 07:59:52 -05002853 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
2854 72,173, //lods %ds:(%rsi),%rax
2855 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07002856 65,137,200, //mov %ecx,%r8d
2857 65,128,224,7, //and $0x7,%r8b
2858 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05002859 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07002860 119,236, //ja a4c <_sk_store_565_hsw+0x68>
2861 65,15,182,192, //movzbl %r8b,%eax
2862 76,141,5,69,0,0,0, //lea 0x45(%rip),%r8 # ab0 <_sk_store_565_hsw+0xcc>
2863 73,99,4,128, //movslq (%r8,%rax,4),%rax
2864 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05002865 255,224, //jmpq *%rax
2866 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
2867 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
2868 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
2869 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
2870 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
2871 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
Mike Klein64b97482017-03-14 17:35:04 -07002872 196,67,121,21,4,121,0, //vpextrw $0x0,%xmm8,(%r9,%rdi,2)
2873 235,159, //jmp a4c <_sk_store_565_hsw+0x68>
2874 15,31,0, //nopl (%rax)
Mike Klein5224f462017-03-07 17:29:54 -05002875 244, //hlt
Mike Klein894d5612017-03-07 07:59:52 -05002876 255, //(bad)
2877 255, //(bad)
2878 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002879 236, //in (%dx),%al
Mike Klein894d5612017-03-07 07:59:52 -05002880 255, //(bad)
2881 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002882 255,228, //jmpq *%rsp
Mike Klein894d5612017-03-07 07:59:52 -05002883 255, //(bad)
2884 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05002885 255, //(bad)
2886 220,255, //fdivr %st,%st(7)
2887 255, //(bad)
2888 255,212, //callq *%rsp
2889 255, //(bad)
2890 255, //(bad)
2891 255,204, //dec %esp
2892 255, //(bad)
2893 255, //(bad)
2894 255,196, //inc %esp
Mike Klein894d5612017-03-07 07:59:52 -05002895 255, //(bad)
2896 255, //(bad)
2897 255, //.byte 0xff
2898};
2899
2900CODE const uint8_t sk_load_8888_hsw[] = {
2901 73,137,200, //mov %rcx,%r8
2902 72,173, //lods %ds:(%rsi),%rax
2903 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
2904 76,3,8, //add (%rax),%r9
2905 77,133,192, //test %r8,%r8
Mike Klein64b97482017-03-14 17:35:04 -07002906 117,104, //jne b49 <_sk_load_8888_hsw+0x7d>
Mike Klein894d5612017-03-07 07:59:52 -05002907 196,193,126,111,25, //vmovdqu (%r9),%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002908 184,255,0,0,0, //mov $0xff,%eax
2909 197,249,110,192, //vmovd %eax,%xmm0
2910 196,226,125,88,208, //vpbroadcastd %xmm0,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002911 197,237,219,195, //vpand %ymm3,%ymm2,%ymm0
2912 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002913 184,129,128,128,59, //mov $0x3b808081,%eax
2914 197,249,110,200, //vmovd %eax,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07002915 196,98,125,88,193, //vpbroadcastd %xmm1,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002916 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05002917 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
2918 197,237,219,201, //vpand %ymm1,%ymm2,%ymm1
2919 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002920 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05002921 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
2922 196,193,109,219,209, //vpand %ymm9,%ymm2,%ymm2
2923 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002924 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002925 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
2926 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
2927 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
2928 72,173, //lods %ds:(%rsi),%rax
2929 76,137,193, //mov %r8,%rcx
2930 255,224, //jmpq *%rax
2931 185,8,0,0,0, //mov $0x8,%ecx
2932 68,41,193, //sub %r8d,%ecx
2933 192,225,3, //shl $0x3,%cl
2934 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
2935 72,211,232, //shr %cl,%rax
2936 196,225,249,110,192, //vmovq %rax,%xmm0
2937 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
2938 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Klein64b97482017-03-14 17:35:04 -07002939 233,116,255,255,255, //jmpq ae6 <_sk_load_8888_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05002940};
2941
2942CODE const uint8_t sk_store_8888_hsw[] = {
2943 73,137,200, //mov %rcx,%r8
2944 72,173, //lods %ds:(%rsi),%rax
2945 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
2946 76,3,8, //add (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002947 184,0,0,127,67, //mov $0x437f0000,%eax
2948 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07002949 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002950 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
2951 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
2952 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
2953 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
2954 196,193,45,114,242,8, //vpslld $0x8,%ymm10,%ymm10
2955 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
2956 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
2957 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
2958 196,193,45,114,242,16, //vpslld $0x10,%ymm10,%ymm10
2959 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
2960 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
2961 196,193,61,114,240,24, //vpslld $0x18,%ymm8,%ymm8
2962 196,65,45,235,192, //vpor %ymm8,%ymm10,%ymm8
2963 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
2964 77,133,192, //test %r8,%r8
Mike Klein64b97482017-03-14 17:35:04 -07002965 117,12, //jne be6 <_sk_store_8888_hsw+0x74>
Mike Klein894d5612017-03-07 07:59:52 -05002966 196,65,126,127,1, //vmovdqu %ymm8,(%r9)
2967 72,173, //lods %ds:(%rsi),%rax
2968 76,137,193, //mov %r8,%rcx
2969 255,224, //jmpq *%rax
2970 185,8,0,0,0, //mov $0x8,%ecx
2971 68,41,193, //sub %r8d,%ecx
2972 192,225,3, //shl $0x3,%cl
2973 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
2974 72,211,232, //shr %cl,%rax
2975 196,97,249,110,200, //vmovq %rax,%xmm9
2976 196,66,125,33,201, //vpmovsxbd %xmm9,%ymm9
2977 196,66,53,142,1, //vpmaskmovd %ymm8,%ymm9,(%r9)
Mike Klein64b97482017-03-14 17:35:04 -07002978 235,211, //jmp bdf <_sk_store_8888_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05002979};
2980
2981CODE const uint8_t sk_load_f16_hsw[] = {
2982 72,173, //lods %ds:(%rsi),%rax
2983 72,139,0, //mov (%rax),%rax
2984 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07002985 117,97, //jne c77 <_sk_load_f16_hsw+0x6b>
2986 197,121,16,4,248, //vmovupd (%rax,%rdi,8),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05002987 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
2988 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07002989 197,122,111,76,248,48, //vmovdqu 0x30(%rax,%rdi,8),%xmm9
2990 197,185,97,194, //vpunpcklwd %xmm2,%xmm8,%xmm0
2991 197,185,105,210, //vpunpckhwd %xmm2,%xmm8,%xmm2
2992 196,193,97,97,201, //vpunpcklwd %xmm9,%xmm3,%xmm1
2993 196,193,97,105,217, //vpunpckhwd %xmm9,%xmm3,%xmm3
2994 197,121,97,194, //vpunpcklwd %xmm2,%xmm0,%xmm8
2995 197,121,105,202, //vpunpckhwd %xmm2,%xmm0,%xmm9
2996 197,241,97,211, //vpunpcklwd %xmm3,%xmm1,%xmm2
2997 197,241,105,219, //vpunpckhwd %xmm3,%xmm1,%xmm3
2998 197,185,108,194, //vpunpcklqdq %xmm2,%xmm8,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05002999 196,226,125,19,192, //vcvtph2ps %xmm0,%ymm0
Mike Klein64b97482017-03-14 17:35:04 -07003000 197,185,109,202, //vpunpckhqdq %xmm2,%xmm8,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05003001 196,226,125,19,201, //vcvtph2ps %xmm1,%ymm1
3002 197,177,108,211, //vpunpcklqdq %xmm3,%xmm9,%xmm2
3003 196,226,125,19,210, //vcvtph2ps %xmm2,%ymm2
3004 197,177,109,219, //vpunpckhqdq %xmm3,%xmm9,%xmm3
3005 196,226,125,19,219, //vcvtph2ps %xmm3,%ymm3
3006 72,173, //lods %ds:(%rsi),%rax
3007 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07003008 197,123,16,4,248, //vmovsd (%rax,%rdi,8),%xmm8
3009 196,65,49,239,201, //vpxor %xmm9,%xmm9,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05003010 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003011 116,79, //je cd6 <_sk_load_f16_hsw+0xca>
3012 197,57,22,68,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05003013 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003014 114,67, //jb cd6 <_sk_load_f16_hsw+0xca>
Mike Klein894d5612017-03-07 07:59:52 -05003015 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
3016 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003017 116,68, //je ce3 <_sk_load_f16_hsw+0xd7>
Mike Klein894d5612017-03-07 07:59:52 -05003018 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
3019 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003020 114,56, //jb ce3 <_sk_load_f16_hsw+0xd7>
Mike Klein894d5612017-03-07 07:59:52 -05003021 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
3022 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003023 15,132,114,255,255,255, //je c2d <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05003024 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
3025 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003026 15,130,98,255,255,255, //jb c2d <_sk_load_f16_hsw+0x21>
3027 197,122,126,76,248,48, //vmovq 0x30(%rax,%rdi,8),%xmm9
3028 233,87,255,255,255, //jmpq c2d <_sk_load_f16_hsw+0x21>
3029 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
3030 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
3031 233,74,255,255,255, //jmpq c2d <_sk_load_f16_hsw+0x21>
3032 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
3033 233,65,255,255,255, //jmpq c2d <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05003034};
3035
3036CODE const uint8_t sk_store_f16_hsw[] = {
3037 72,173, //lods %ds:(%rsi),%rax
3038 72,139,0, //mov (%rax),%rax
3039 196,195,125,29,192,4, //vcvtps2ph $0x4,%ymm0,%xmm8
3040 196,195,125,29,201,4, //vcvtps2ph $0x4,%ymm1,%xmm9
3041 196,195,125,29,210,4, //vcvtps2ph $0x4,%ymm2,%xmm10
3042 196,195,125,29,219,4, //vcvtps2ph $0x4,%ymm3,%xmm11
3043 196,65,57,97,225, //vpunpcklwd %xmm9,%xmm8,%xmm12
3044 196,65,57,105,193, //vpunpckhwd %xmm9,%xmm8,%xmm8
3045 196,65,41,97,203, //vpunpcklwd %xmm11,%xmm10,%xmm9
3046 196,65,41,105,235, //vpunpckhwd %xmm11,%xmm10,%xmm13
3047 196,65,25,98,217, //vpunpckldq %xmm9,%xmm12,%xmm11
3048 196,65,25,106,209, //vpunpckhdq %xmm9,%xmm12,%xmm10
3049 196,65,57,98,205, //vpunpckldq %xmm13,%xmm8,%xmm9
3050 196,65,57,106,197, //vpunpckhdq %xmm13,%xmm8,%xmm8
3051 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003052 117,27, //jne d51 <_sk_store_f16_hsw+0x65>
Mike Klein894d5612017-03-07 07:59:52 -05003053 197,120,17,28,248, //vmovups %xmm11,(%rax,%rdi,8)
3054 197,120,17,84,248,16, //vmovups %xmm10,0x10(%rax,%rdi,8)
3055 197,120,17,76,248,32, //vmovups %xmm9,0x20(%rax,%rdi,8)
3056 197,122,127,68,248,48, //vmovdqu %xmm8,0x30(%rax,%rdi,8)
3057 72,173, //lods %ds:(%rsi),%rax
3058 255,224, //jmpq *%rax
3059 197,121,214,28,248, //vmovq %xmm11,(%rax,%rdi,8)
3060 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003061 116,241, //je d4d <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05003062 197,121,23,92,248,8, //vmovhpd %xmm11,0x8(%rax,%rdi,8)
3063 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003064 114,229, //jb d4d <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05003065 197,121,214,84,248,16, //vmovq %xmm10,0x10(%rax,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07003066 116,221, //je d4d <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05003067 197,121,23,84,248,24, //vmovhpd %xmm10,0x18(%rax,%rdi,8)
3068 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003069 114,209, //jb d4d <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05003070 197,121,214,76,248,32, //vmovq %xmm9,0x20(%rax,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07003071 116,201, //je d4d <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05003072 197,121,23,76,248,40, //vmovhpd %xmm9,0x28(%rax,%rdi,8)
3073 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003074 114,189, //jb d4d <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05003075 197,121,214,68,248,48, //vmovq %xmm8,0x30(%rax,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07003076 235,181, //jmp d4d <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05003077};
3078
3079CODE const uint8_t sk_store_f32_hsw[] = {
3080 72,173, //lods %ds:(%rsi),%rax
3081 76,139,0, //mov (%rax),%r8
3082 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
3083 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
3084 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
3085 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
3086 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
3087 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
3088 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
3089 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
3090 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
3091 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003092 117,55, //jne e05 <_sk_store_f32_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05003093 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
3094 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
3095 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
3096 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
3097 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
3098 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
3099 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
3100 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
3101 72,173, //lods %ds:(%rsi),%rax
3102 255,224, //jmpq *%rax
3103 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
3104 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003105 116,240, //je e01 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003106 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
3107 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003108 114,227, //jb e01 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003109 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07003110 116,218, //je e01 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003111 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
3112 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003113 114,205, //jb e01 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003114 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07003115 116,195, //je e01 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003116 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
3117 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07003118 114,181, //jb e01 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003119 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07003120 235,171, //jmp e01 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003121};
3122
3123CODE const uint8_t sk_clamp_x_hsw[] = {
3124 72,173, //lods %ds:(%rsi),%rax
3125 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3126 197,188,95,192, //vmaxps %ymm0,%ymm8,%ymm0
3127 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
3128 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
3129 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
3130 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
3131 72,173, //lods %ds:(%rsi),%rax
3132 255,224, //jmpq *%rax
3133};
3134
3135CODE const uint8_t sk_clamp_y_hsw[] = {
3136 72,173, //lods %ds:(%rsi),%rax
3137 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3138 197,188,95,201, //vmaxps %ymm1,%ymm8,%ymm1
3139 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
3140 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
3141 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
3142 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
3143 72,173, //lods %ds:(%rsi),%rax
3144 255,224, //jmpq *%rax
3145};
3146
3147CODE const uint8_t sk_repeat_x_hsw[] = {
3148 72,173, //lods %ds:(%rsi),%rax
3149 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3150 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
3151 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
3152 196,98,61,172,200, //vfnmadd213ps %ymm0,%ymm8,%ymm9
3153 197,253,118,192, //vpcmpeqd %ymm0,%ymm0,%ymm0
3154 197,189,254,192, //vpaddd %ymm0,%ymm8,%ymm0
3155 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
3156 72,173, //lods %ds:(%rsi),%rax
3157 255,224, //jmpq *%rax
3158};
3159
3160CODE const uint8_t sk_repeat_y_hsw[] = {
3161 72,173, //lods %ds:(%rsi),%rax
3162 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3163 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
3164 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
3165 196,98,61,172,201, //vfnmadd213ps %ymm1,%ymm8,%ymm9
3166 197,245,118,201, //vpcmpeqd %ymm1,%ymm1,%ymm1
3167 197,189,254,201, //vpaddd %ymm1,%ymm8,%ymm1
3168 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
3169 72,173, //lods %ds:(%rsi),%rax
3170 255,224, //jmpq *%rax
3171};
3172
3173CODE const uint8_t sk_mirror_x_hsw[] = {
3174 72,173, //lods %ds:(%rsi),%rax
3175 197,122,16,0, //vmovss (%rax),%xmm8
3176 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
3177 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
3178 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
3179 196,226,125,24,192, //vbroadcastss %xmm0,%ymm0
3180 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
3181 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
3182 196,66,125,172,194, //vfnmadd213ps %ymm10,%ymm0,%ymm8
3183 196,193,60,92,193, //vsubps %ymm9,%ymm8,%ymm0
3184 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3185 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
3186 197,188,84,192, //vandps %ymm0,%ymm8,%ymm0
3187 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
3188 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
3189 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
3190 72,173, //lods %ds:(%rsi),%rax
3191 255,224, //jmpq *%rax
3192};
3193
3194CODE const uint8_t sk_mirror_y_hsw[] = {
3195 72,173, //lods %ds:(%rsi),%rax
3196 197,122,16,0, //vmovss (%rax),%xmm8
3197 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
3198 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
3199 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
3200 196,226,125,24,201, //vbroadcastss %xmm1,%ymm1
3201 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
3202 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
3203 196,66,117,172,194, //vfnmadd213ps %ymm10,%ymm1,%ymm8
3204 196,193,60,92,201, //vsubps %ymm9,%ymm8,%ymm1
3205 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3206 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
3207 197,188,84,201, //vandps %ymm1,%ymm8,%ymm1
3208 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
3209 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
3210 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
3211 72,173, //lods %ds:(%rsi),%rax
3212 255,224, //jmpq *%rax
3213};
3214
Mike Kleine9ed07d2017-03-07 12:28:11 -05003215CODE const uint8_t sk_luminance_to_alpha_hsw[] = {
Mike Klein5224f462017-03-07 17:29:54 -05003216 184,208,179,89,62, //mov $0x3e59b3d0,%eax
3217 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07003218 196,98,125,88,195, //vpbroadcastd %xmm3,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05003219 184,89,23,55,63, //mov $0x3f371759,%eax
3220 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07003221 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05003222 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
3223 196,98,125,168,193, //vfmadd213ps %ymm1,%ymm0,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05003224 184,152,221,147,61, //mov $0x3d93dd98,%eax
3225 197,249,110,192, //vmovd %eax,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07003226 196,226,125,88,216, //vpbroadcastd %xmm0,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05003227 196,194,109,168,216, //vfmadd213ps %ymm8,%ymm2,%ymm3
3228 72,173, //lods %ds:(%rsi),%rax
Mike Klein64b97482017-03-14 17:35:04 -07003229 197,253,239,192, //vpxor %ymm0,%ymm0,%ymm0
Mike Kleine9ed07d2017-03-07 12:28:11 -05003230 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3231 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3232 255,224, //jmpq *%rax
3233};
3234
Mike Klein894d5612017-03-07 07:59:52 -05003235CODE const uint8_t sk_matrix_2x3_hsw[] = {
3236 72,173, //lods %ds:(%rsi),%rax
3237 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
3238 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
3239 196,98,125,24,64,16, //vbroadcastss 0x10(%rax),%ymm8
3240 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
3241 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
3242 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
3243 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
3244 196,98,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm9
3245 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
3246 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
3247 72,173, //lods %ds:(%rsi),%rax
3248 197,124,41,192, //vmovaps %ymm8,%ymm0
3249 197,124,41,201, //vmovaps %ymm9,%ymm1
3250 255,224, //jmpq *%rax
3251};
3252
3253CODE const uint8_t sk_matrix_3x4_hsw[] = {
3254 72,173, //lods %ds:(%rsi),%rax
3255 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
3256 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
3257 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
3258 196,98,125,24,64,36, //vbroadcastss 0x24(%rax),%ymm8
3259 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
3260 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
3261 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
3262 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
3263 196,98,125,24,88,16, //vbroadcastss 0x10(%rax),%ymm11
3264 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
3265 196,98,125,24,72,40, //vbroadcastss 0x28(%rax),%ymm9
3266 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
3267 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
3268 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
3269 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
3270 196,98,125,24,96,20, //vbroadcastss 0x14(%rax),%ymm12
3271 196,98,125,24,104,32, //vbroadcastss 0x20(%rax),%ymm13
3272 196,98,125,24,80,44, //vbroadcastss 0x2c(%rax),%ymm10
3273 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
3274 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
3275 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
3276 72,173, //lods %ds:(%rsi),%rax
3277 197,124,41,192, //vmovaps %ymm8,%ymm0
3278 197,124,41,201, //vmovaps %ymm9,%ymm1
3279 197,124,41,210, //vmovaps %ymm10,%ymm2
3280 255,224, //jmpq *%rax
3281};
3282
Mike Kleine9ed07d2017-03-07 12:28:11 -05003283CODE const uint8_t sk_matrix_4x5_hsw[] = {
3284 72,173, //lods %ds:(%rsi),%rax
3285 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
3286 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
3287 196,98,125,24,88,32, //vbroadcastss 0x20(%rax),%ymm11
3288 196,98,125,24,96,48, //vbroadcastss 0x30(%rax),%ymm12
3289 196,98,125,24,64,64, //vbroadcastss 0x40(%rax),%ymm8
3290 196,66,101,184,196, //vfmadd231ps %ymm12,%ymm3,%ymm8
3291 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
3292 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
3293 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
3294 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
3295 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
3296 196,98,125,24,96,36, //vbroadcastss 0x24(%rax),%ymm12
3297 196,98,125,24,104,52, //vbroadcastss 0x34(%rax),%ymm13
3298 196,98,125,24,72,68, //vbroadcastss 0x44(%rax),%ymm9
3299 196,66,101,184,205, //vfmadd231ps %ymm13,%ymm3,%ymm9
3300 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
3301 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
3302 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
3303 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
3304 196,98,125,24,96,24, //vbroadcastss 0x18(%rax),%ymm12
3305 196,98,125,24,104,40, //vbroadcastss 0x28(%rax),%ymm13
3306 196,98,125,24,112,56, //vbroadcastss 0x38(%rax),%ymm14
3307 196,98,125,24,80,72, //vbroadcastss 0x48(%rax),%ymm10
3308 196,66,101,184,214, //vfmadd231ps %ymm14,%ymm3,%ymm10
3309 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
3310 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
3311 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
3312 196,98,125,24,96,12, //vbroadcastss 0xc(%rax),%ymm12
3313 196,98,125,24,104,28, //vbroadcastss 0x1c(%rax),%ymm13
3314 196,98,125,24,112,44, //vbroadcastss 0x2c(%rax),%ymm14
3315 196,98,125,24,120,60, //vbroadcastss 0x3c(%rax),%ymm15
3316 196,98,125,24,88,76, //vbroadcastss 0x4c(%rax),%ymm11
3317 196,66,101,184,223, //vfmadd231ps %ymm15,%ymm3,%ymm11
3318 196,66,109,184,222, //vfmadd231ps %ymm14,%ymm2,%ymm11
3319 196,66,117,184,221, //vfmadd231ps %ymm13,%ymm1,%ymm11
3320 196,66,125,184,220, //vfmadd231ps %ymm12,%ymm0,%ymm11
3321 72,173, //lods %ds:(%rsi),%rax
3322 197,124,41,192, //vmovaps %ymm8,%ymm0
3323 197,124,41,201, //vmovaps %ymm9,%ymm1
3324 197,124,41,210, //vmovaps %ymm10,%ymm2
3325 197,124,41,219, //vmovaps %ymm11,%ymm3
3326 255,224, //jmpq *%rax
3327};
3328
Mike Klein894d5612017-03-07 07:59:52 -05003329CODE const uint8_t sk_matrix_perspective_hsw[] = {
3330 72,173, //lods %ds:(%rsi),%rax
3331 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3332 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
3333 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
3334 196,66,117,184,209, //vfmadd231ps %ymm9,%ymm1,%ymm10
3335 196,66,125,184,208, //vfmadd231ps %ymm8,%ymm0,%ymm10
3336 196,98,125,24,64,12, //vbroadcastss 0xc(%rax),%ymm8
3337 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
3338 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
3339 196,66,117,184,217, //vfmadd231ps %ymm9,%ymm1,%ymm11
3340 196,66,125,184,216, //vfmadd231ps %ymm8,%ymm0,%ymm11
3341 196,98,125,24,64,24, //vbroadcastss 0x18(%rax),%ymm8
3342 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
3343 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
3344 196,66,117,184,225, //vfmadd231ps %ymm9,%ymm1,%ymm12
3345 196,66,125,184,224, //vfmadd231ps %ymm8,%ymm0,%ymm12
3346 196,193,124,83,204, //vrcpps %ymm12,%ymm1
3347 197,172,89,193, //vmulps %ymm1,%ymm10,%ymm0
3348 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
3349 72,173, //lods %ds:(%rsi),%rax
3350 255,224, //jmpq *%rax
3351};
3352
3353CODE const uint8_t sk_linear_gradient_2stops_hsw[] = {
3354 72,173, //lods %ds:(%rsi),%rax
3355 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
3356 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3357 196,98,125,184,193, //vfmadd231ps %ymm1,%ymm0,%ymm8
3358 196,226,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm2
3359 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
3360 196,226,125,184,202, //vfmadd231ps %ymm2,%ymm0,%ymm1
3361 196,226,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm3
3362 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
3363 196,226,125,184,211, //vfmadd231ps %ymm3,%ymm0,%ymm2
3364 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
3365 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
3366 196,194,125,184,217, //vfmadd231ps %ymm9,%ymm0,%ymm3
3367 72,173, //lods %ds:(%rsi),%rax
3368 197,124,41,192, //vmovaps %ymm8,%ymm0
3369 255,224, //jmpq *%rax
3370};
3371
3372CODE const uint8_t sk_start_pipeline_avx[] = {
3373 65,87, //push %r15
3374 65,86, //push %r14
3375 65,85, //push %r13
3376 65,84, //push %r12
3377 83, //push %rbx
3378 73,137,205, //mov %rcx,%r13
3379 73,137,214, //mov %rdx,%r14
3380 72,137,251, //mov %rdi,%rbx
3381 72,173, //lods %ds:(%rsi),%rax
3382 73,137,199, //mov %rax,%r15
3383 73,137,244, //mov %rsi,%r12
3384 72,141,67,8, //lea 0x8(%rbx),%rax
3385 76,57,232, //cmp %r13,%rax
3386 118,5, //jbe 28 <_sk_start_pipeline_avx+0x28>
3387 72,137,223, //mov %rbx,%rdi
3388 235,65, //jmp 69 <_sk_start_pipeline_avx+0x69>
3389 185,0,0,0,0, //mov $0x0,%ecx
3390 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3391 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3392 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3393 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3394 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
3395 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
3396 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
3397 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
3398 72,137,223, //mov %rbx,%rdi
3399 76,137,230, //mov %r12,%rsi
3400 76,137,242, //mov %r14,%rdx
3401 65,255,215, //callq *%r15
3402 72,141,123,8, //lea 0x8(%rbx),%rdi
3403 72,131,195,16, //add $0x10,%rbx
3404 76,57,235, //cmp %r13,%rbx
3405 72,137,251, //mov %rdi,%rbx
3406 118,191, //jbe 28 <_sk_start_pipeline_avx+0x28>
3407 76,137,233, //mov %r13,%rcx
3408 72,41,249, //sub %rdi,%rcx
3409 116,41, //je 9a <_sk_start_pipeline_avx+0x9a>
3410 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3411 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3412 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3413 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3414 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
3415 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
3416 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
3417 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
3418 76,137,230, //mov %r12,%rsi
3419 76,137,242, //mov %r14,%rdx
3420 65,255,215, //callq *%r15
3421 76,137,232, //mov %r13,%rax
3422 91, //pop %rbx
3423 65,92, //pop %r12
3424 65,93, //pop %r13
3425 65,94, //pop %r14
3426 65,95, //pop %r15
3427 197,248,119, //vzeroupper
3428 195, //retq
3429};
3430
3431CODE const uint8_t sk_just_return_avx[] = {
3432 195, //retq
3433};
3434
3435CODE const uint8_t sk_seed_shader_avx[] = {
3436 72,173, //lods %ds:(%rsi),%rax
3437 197,249,110,199, //vmovd %edi,%xmm0
3438 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
3439 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
3440 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003441 65,184,0,0,0,63, //mov $0x3f000000,%r8d
3442 196,193,121,110,200, //vmovd %r8d,%xmm1
3443 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
3444 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05003445 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05003446 197,252,88,2, //vaddps (%rdx),%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05003447 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
3448 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
3449 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003450 184,0,0,128,63, //mov $0x3f800000,%eax
3451 197,249,110,208, //vmovd %eax,%xmm2
3452 196,227,121,4,210,0, //vpermilps $0x0,%xmm2,%xmm2
3453 196,227,109,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05003454 72,173, //lods %ds:(%rsi),%rax
3455 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3456 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
3457 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
3458 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
3459 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
3460 255,224, //jmpq *%rax
3461};
3462
3463CODE const uint8_t sk_constant_color_avx[] = {
3464 72,173, //lods %ds:(%rsi),%rax
3465 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
3466 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
3467 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
3468 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
3469 72,173, //lods %ds:(%rsi),%rax
3470 255,224, //jmpq *%rax
3471};
3472
3473CODE const uint8_t sk_clear_avx[] = {
3474 72,173, //lods %ds:(%rsi),%rax
3475 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3476 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3477 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3478 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3479 255,224, //jmpq *%rax
3480};
3481
3482CODE const uint8_t sk_plus__avx[] = {
3483 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3484 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3485 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3486 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3487 72,173, //lods %ds:(%rsi),%rax
3488 255,224, //jmpq *%rax
3489};
3490
3491CODE const uint8_t sk_srcover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003492 184,0,0,128,63, //mov $0x3f800000,%eax
3493 197,121,110,192, //vmovd %eax,%xmm8
3494 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3495 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003496 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
3497 197,60,89,204, //vmulps %ymm4,%ymm8,%ymm9
3498 197,180,88,192, //vaddps %ymm0,%ymm9,%ymm0
3499 197,60,89,205, //vmulps %ymm5,%ymm8,%ymm9
3500 197,180,88,201, //vaddps %ymm1,%ymm9,%ymm1
3501 197,60,89,206, //vmulps %ymm6,%ymm8,%ymm9
3502 197,180,88,210, //vaddps %ymm2,%ymm9,%ymm2
3503 197,60,89,199, //vmulps %ymm7,%ymm8,%ymm8
3504 197,188,88,219, //vaddps %ymm3,%ymm8,%ymm3
3505 72,173, //lods %ds:(%rsi),%rax
3506 255,224, //jmpq *%rax
3507};
3508
3509CODE const uint8_t sk_dstover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003510 184,0,0,128,63, //mov $0x3f800000,%eax
3511 197,121,110,192, //vmovd %eax,%xmm8
3512 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3513 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003514 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
3515 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3516 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3517 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3518 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3519 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3520 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3521 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
3522 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3523 72,173, //lods %ds:(%rsi),%rax
3524 255,224, //jmpq *%rax
3525};
3526
3527CODE const uint8_t sk_clamp_0_avx[] = {
3528 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3529 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
3530 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
3531 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
3532 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
3533 72,173, //lods %ds:(%rsi),%rax
3534 255,224, //jmpq *%rax
3535};
3536
3537CODE const uint8_t sk_clamp_1_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003538 184,0,0,128,63, //mov $0x3f800000,%eax
3539 197,121,110,192, //vmovd %eax,%xmm8
3540 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3541 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003542 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
3543 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
3544 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
3545 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
3546 72,173, //lods %ds:(%rsi),%rax
3547 255,224, //jmpq *%rax
3548};
3549
3550CODE const uint8_t sk_clamp_a_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003551 184,0,0,128,63, //mov $0x3f800000,%eax
3552 197,121,110,192, //vmovd %eax,%xmm8
3553 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3554 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003555 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
3556 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
3557 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
3558 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
3559 72,173, //lods %ds:(%rsi),%rax
3560 255,224, //jmpq *%rax
3561};
3562
3563CODE const uint8_t sk_set_rgb_avx[] = {
3564 72,173, //lods %ds:(%rsi),%rax
3565 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
3566 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
3567 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
3568 72,173, //lods %ds:(%rsi),%rax
3569 255,224, //jmpq *%rax
3570};
3571
3572CODE const uint8_t sk_swap_rb_avx[] = {
3573 197,124,40,192, //vmovaps %ymm0,%ymm8
3574 72,173, //lods %ds:(%rsi),%rax
3575 197,252,40,194, //vmovaps %ymm2,%ymm0
3576 197,124,41,194, //vmovaps %ymm8,%ymm2
3577 255,224, //jmpq *%rax
3578};
3579
3580CODE const uint8_t sk_swap_avx[] = {
3581 197,124,40,195, //vmovaps %ymm3,%ymm8
3582 197,124,40,202, //vmovaps %ymm2,%ymm9
3583 197,124,40,209, //vmovaps %ymm1,%ymm10
3584 197,124,40,216, //vmovaps %ymm0,%ymm11
3585 72,173, //lods %ds:(%rsi),%rax
3586 197,252,40,196, //vmovaps %ymm4,%ymm0
3587 197,252,40,205, //vmovaps %ymm5,%ymm1
3588 197,252,40,214, //vmovaps %ymm6,%ymm2
3589 197,252,40,223, //vmovaps %ymm7,%ymm3
3590 197,124,41,220, //vmovaps %ymm11,%ymm4
3591 197,124,41,213, //vmovaps %ymm10,%ymm5
3592 197,124,41,206, //vmovaps %ymm9,%ymm6
3593 197,124,41,199, //vmovaps %ymm8,%ymm7
3594 255,224, //jmpq *%rax
3595};
3596
3597CODE const uint8_t sk_move_src_dst_avx[] = {
3598 72,173, //lods %ds:(%rsi),%rax
3599 197,252,40,224, //vmovaps %ymm0,%ymm4
3600 197,252,40,233, //vmovaps %ymm1,%ymm5
3601 197,252,40,242, //vmovaps %ymm2,%ymm6
3602 197,252,40,251, //vmovaps %ymm3,%ymm7
3603 255,224, //jmpq *%rax
3604};
3605
3606CODE const uint8_t sk_move_dst_src_avx[] = {
3607 72,173, //lods %ds:(%rsi),%rax
3608 197,252,40,196, //vmovaps %ymm4,%ymm0
3609 197,252,40,205, //vmovaps %ymm5,%ymm1
3610 197,252,40,214, //vmovaps %ymm6,%ymm2
3611 197,252,40,223, //vmovaps %ymm7,%ymm3
3612 255,224, //jmpq *%rax
3613};
3614
3615CODE const uint8_t sk_premul_avx[] = {
3616 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
3617 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
3618 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
3619 72,173, //lods %ds:(%rsi),%rax
3620 255,224, //jmpq *%rax
3621};
3622
3623CODE const uint8_t sk_unpremul_avx[] = {
3624 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3625 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003626 184,0,0,128,63, //mov $0x3f800000,%eax
3627 197,121,110,208, //vmovd %eax,%xmm10
3628 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
3629 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05003630 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
3631 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
3632 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3633 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3634 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3635 72,173, //lods %ds:(%rsi),%rax
3636 255,224, //jmpq *%rax
3637};
3638
3639CODE const uint8_t sk_from_srgb_avx[] = {
Mike Klein5224f462017-03-07 17:29:54 -05003640 184,145,131,158,61, //mov $0x3d9e8391,%eax
3641 197,121,110,192, //vmovd %eax,%xmm8
3642 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3643 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003644 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
3645 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05003646 184,154,153,153,62, //mov $0x3e99999a,%eax
3647 197,121,110,216, //vmovd %eax,%xmm11
3648 196,67,121,4,219,0, //vpermilps $0x0,%xmm11,%xmm11
3649 196,67,37,24,219,1, //vinsertf128 $0x1,%xmm11,%ymm11,%ymm11
3650 184,92,143,50,63, //mov $0x3f328f5c,%eax
3651 197,121,110,224, //vmovd %eax,%xmm12
3652 196,67,121,4,228,0, //vpermilps $0x0,%xmm12,%xmm12
3653 196,67,29,24,228,1, //vinsertf128 $0x1,%xmm12,%ymm12,%ymm12
Mike Klein894d5612017-03-07 07:59:52 -05003654 197,36,89,232, //vmulps %ymm0,%ymm11,%ymm13
3655 196,65,20,88,236, //vaddps %ymm12,%ymm13,%ymm13
Mike Klein5224f462017-03-07 17:29:54 -05003656 184,10,215,35,59, //mov $0x3b23d70a,%eax
3657 197,121,110,240, //vmovd %eax,%xmm14
3658 196,67,121,4,246,0, //vpermilps $0x0,%xmm14,%xmm14
3659 196,67,13,24,246,1, //vinsertf128 $0x1,%xmm14,%ymm14,%ymm14
Mike Klein894d5612017-03-07 07:59:52 -05003660 196,65,44,89,213, //vmulps %ymm13,%ymm10,%ymm10
3661 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05003662 184,174,71,97,61, //mov $0x3d6147ae,%eax
3663 197,121,110,232, //vmovd %eax,%xmm13
3664 196,67,121,4,237,0, //vpermilps $0x0,%xmm13,%xmm13
3665 196,67,21,24,237,1, //vinsertf128 $0x1,%xmm13,%ymm13,%ymm13
Mike Klein894d5612017-03-07 07:59:52 -05003666 196,193,124,194,197,1, //vcmpltps %ymm13,%ymm0,%ymm0
3667 196,195,45,74,193,0, //vblendvps %ymm0,%ymm9,%ymm10,%ymm0
3668 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
3669 197,116,89,209, //vmulps %ymm1,%ymm1,%ymm10
3670 197,36,89,249, //vmulps %ymm1,%ymm11,%ymm15
Mike Klein5224f462017-03-07 17:29:54 -05003671 196,65,28,88,255, //vaddps %ymm15,%ymm12,%ymm15
Mike Klein894d5612017-03-07 07:59:52 -05003672 196,65,44,89,215, //vmulps %ymm15,%ymm10,%ymm10
3673 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
3674 196,193,116,194,205,1, //vcmpltps %ymm13,%ymm1,%ymm1
3675 196,195,45,74,201,16, //vblendvps %ymm1,%ymm9,%ymm10,%ymm1
3676 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
3677 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
3678 197,36,89,210, //vmulps %ymm2,%ymm11,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05003679 196,65,28,88,210, //vaddps %ymm10,%ymm12,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05003680 196,65,52,89,202, //vmulps %ymm10,%ymm9,%ymm9
3681 196,65,12,88,201, //vaddps %ymm9,%ymm14,%ymm9
3682 196,193,108,194,213,1, //vcmpltps %ymm13,%ymm2,%ymm2
3683 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
3684 72,173, //lods %ds:(%rsi),%rax
3685 255,224, //jmpq *%rax
3686};
3687
3688CODE const uint8_t sk_to_srgb_avx[] = {
3689 197,124,82,192, //vrsqrtps %ymm0,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05003690 196,65,124,83,232, //vrcpps %ymm8,%ymm13
3691 196,65,124,82,240, //vrsqrtps %ymm8,%ymm14
3692 184,41,92,71,65, //mov $0x41475c29,%eax
3693 197,121,110,192, //vmovd %eax,%xmm8
3694 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3695 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
3696 197,60,89,224, //vmulps %ymm0,%ymm8,%ymm12
3697 184,0,0,128,63, //mov $0x3f800000,%eax
3698 197,121,110,200, //vmovd %eax,%xmm9
3699 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
3700 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
3701 184,194,135,210,62, //mov $0x3ed287c2,%eax
3702 197,121,110,208, //vmovd %eax,%xmm10
3703 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
3704 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
3705 184,206,111,48,63, //mov $0x3f306fce,%eax
3706 197,121,110,216, //vmovd %eax,%xmm11
3707 196,67,121,4,219,0, //vpermilps $0x0,%xmm11,%xmm11
3708 196,67,37,24,219,1, //vinsertf128 $0x1,%xmm11,%ymm11,%ymm11
3709 184,168,87,202,61, //mov $0x3dca57a8,%eax
3710 53,0,0,0,128, //xor $0x80000000,%eax
3711 197,121,110,248, //vmovd %eax,%xmm15
3712 196,67,121,4,255,0, //vpermilps $0x0,%xmm15,%xmm15
3713 196,67,5,24,255,1, //vinsertf128 $0x1,%xmm15,%ymm15,%ymm15
3714 196,65,20,89,235, //vmulps %ymm11,%ymm13,%ymm13
3715 196,65,20,88,239, //vaddps %ymm15,%ymm13,%ymm13
3716 196,65,12,89,242, //vmulps %ymm10,%ymm14,%ymm14
3717 196,65,12,88,237, //vaddps %ymm13,%ymm14,%ymm13
3718 196,65,52,93,237, //vminps %ymm13,%ymm9,%ymm13
3719 184,4,231,140,59, //mov $0x3b8ce704,%eax
3720 197,121,110,240, //vmovd %eax,%xmm14
3721 196,67,121,4,246,0, //vpermilps $0x0,%xmm14,%xmm14
3722 196,67,13,24,246,1, //vinsertf128 $0x1,%xmm14,%ymm14,%ymm14
3723 196,193,124,194,198,1, //vcmpltps %ymm14,%ymm0,%ymm0
3724 196,195,21,74,196,0, //vblendvps %ymm0,%ymm12,%ymm13,%ymm0
3725 197,124,82,225, //vrsqrtps %ymm1,%ymm12
3726 196,65,124,83,236, //vrcpps %ymm12,%ymm13
3727 196,65,124,82,228, //vrsqrtps %ymm12,%ymm12
3728 196,65,36,89,237, //vmulps %ymm13,%ymm11,%ymm13
3729 196,65,4,88,237, //vaddps %ymm13,%ymm15,%ymm13
3730 196,65,44,89,228, //vmulps %ymm12,%ymm10,%ymm12
3731 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
3732 197,60,89,233, //vmulps %ymm1,%ymm8,%ymm13
3733 196,65,52,93,228, //vminps %ymm12,%ymm9,%ymm12
3734 196,193,116,194,206,1, //vcmpltps %ymm14,%ymm1,%ymm1
3735 196,195,29,74,205,16, //vblendvps %ymm1,%ymm13,%ymm12,%ymm1
3736 197,124,82,226, //vrsqrtps %ymm2,%ymm12
3737 196,65,124,83,236, //vrcpps %ymm12,%ymm13
3738 196,65,36,89,221, //vmulps %ymm13,%ymm11,%ymm11
Mike Klein894d5612017-03-07 07:59:52 -05003739 196,65,4,88,219, //vaddps %ymm11,%ymm15,%ymm11
Mike Klein5224f462017-03-07 17:29:54 -05003740 196,65,124,82,228, //vrsqrtps %ymm12,%ymm12
3741 196,65,44,89,212, //vmulps %ymm12,%ymm10,%ymm10
3742 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
3743 196,65,52,93,202, //vminps %ymm10,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05003744 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05003745 196,193,108,194,214,1, //vcmpltps %ymm14,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05003746 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
3747 72,173, //lods %ds:(%rsi),%rax
3748 255,224, //jmpq *%rax
3749};
3750
3751CODE const uint8_t sk_scale_1_float_avx[] = {
3752 72,173, //lods %ds:(%rsi),%rax
3753 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3754 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3755 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3756 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3757 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
3758 72,173, //lods %ds:(%rsi),%rax
3759 255,224, //jmpq *%rax
3760};
3761
3762CODE const uint8_t sk_scale_u8_avx[] = {
3763 73,137,200, //mov %rcx,%r8
3764 72,173, //lods %ds:(%rsi),%rax
3765 72,139,0, //mov (%rax),%rax
3766 72,1,248, //add %rdi,%rax
3767 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05003768 117,80, //jne 5a2 <_sk_scale_u8_avx+0x60>
Mike Klein64b97482017-03-14 17:35:04 -07003769 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05003770 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
3771 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
3772 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
3773 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
3774 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003775 184,129,128,128,59, //mov $0x3b808081,%eax
3776 197,121,110,200, //vmovd %eax,%xmm9
3777 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
3778 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05003779 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
3780 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3781 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3782 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3783 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
3784 72,173, //lods %ds:(%rsi),%rax
3785 76,137,193, //mov %r8,%rcx
3786 255,224, //jmpq *%rax
3787 49,201, //xor %ecx,%ecx
3788 77,137,194, //mov %r8,%r10
3789 69,49,201, //xor %r9d,%r9d
3790 68,15,182,24, //movzbl (%rax),%r11d
3791 72,255,192, //inc %rax
3792 73,211,227, //shl %cl,%r11
3793 77,9,217, //or %r11,%r9
3794 72,131,193,8, //add $0x8,%rcx
3795 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05003796 117,234, //jne 5aa <_sk_scale_u8_avx+0x68>
Mike Klein894d5612017-03-07 07:59:52 -05003797 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05003798 235,143, //jmp 556 <_sk_scale_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05003799};
3800
3801CODE const uint8_t sk_lerp_1_float_avx[] = {
3802 72,173, //lods %ds:(%rsi),%rax
3803 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3804 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
3805 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
3806 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3807 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
3808 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
3809 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3810 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
3811 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
3812 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3813 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
3814 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
3815 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3816 72,173, //lods %ds:(%rsi),%rax
3817 255,224, //jmpq *%rax
3818};
3819
3820CODE const uint8_t sk_lerp_u8_avx[] = {
3821 73,137,200, //mov %rcx,%r8
3822 72,173, //lods %ds:(%rsi),%rax
3823 72,139,0, //mov (%rax),%rax
3824 72,1,248, //add %rdi,%rax
3825 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05003826 117,116, //jne 68a <_sk_lerp_u8_avx+0x84>
Mike Klein64b97482017-03-14 17:35:04 -07003827 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05003828 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
3829 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
3830 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
3831 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
3832 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003833 184,129,128,128,59, //mov $0x3b808081,%eax
3834 197,121,110,200, //vmovd %eax,%xmm9
3835 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
3836 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05003837 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
3838 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
3839 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
3840 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3841 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
3842 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
3843 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3844 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
3845 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
3846 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3847 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
3848 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
3849 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3850 72,173, //lods %ds:(%rsi),%rax
3851 76,137,193, //mov %r8,%rcx
3852 255,224, //jmpq *%rax
3853 49,201, //xor %ecx,%ecx
3854 77,137,194, //mov %r8,%r10
3855 69,49,201, //xor %r9d,%r9d
3856 68,15,182,24, //movzbl (%rax),%r11d
3857 72,255,192, //inc %rax
3858 73,211,227, //shl %cl,%r11
3859 77,9,217, //or %r11,%r9
3860 72,131,193,8, //add $0x8,%rcx
3861 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05003862 117,234, //jne 692 <_sk_lerp_u8_avx+0x8c>
Mike Klein894d5612017-03-07 07:59:52 -05003863 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05003864 233,104,255,255,255, //jmpq 61a <_sk_lerp_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05003865};
3866
3867CODE const uint8_t sk_lerp_565_avx[] = {
3868 72,173, //lods %ds:(%rsi),%rax
3869 76,139,16, //mov (%rax),%r10
3870 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05003871 15,133,250,0,0,0, //jne 7ba <_sk_lerp_565_avx+0x108>
Mike Klein894d5612017-03-07 07:59:52 -05003872 196,65,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm8
3873 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
3874 197,185,105,219, //vpunpckhwd %xmm3,%xmm8,%xmm3
3875 196,66,121,51,192, //vpmovzxwd %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05003876 196,99,61,24,195,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm8
3877 184,0,248,0,0, //mov $0xf800,%eax
3878 197,249,110,216, //vmovd %eax,%xmm3
3879 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
3880 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
3881 196,193,100,84,216, //vandps %ymm8,%ymm3,%ymm3
3882 197,124,91,203, //vcvtdq2ps %ymm3,%ymm9
3883 184,8,33,132,55, //mov $0x37842108,%eax
3884 197,249,110,216, //vmovd %eax,%xmm3
3885 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
3886 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
3887 197,52,89,203, //vmulps %ymm3,%ymm9,%ymm9
3888 184,224,7,0,0, //mov $0x7e0,%eax
3889 197,249,110,216, //vmovd %eax,%xmm3
3890 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
3891 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
3892 196,193,100,84,216, //vandps %ymm8,%ymm3,%ymm3
3893 197,124,91,211, //vcvtdq2ps %ymm3,%ymm10
3894 184,33,8,2,58, //mov $0x3a020821,%eax
3895 197,249,110,216, //vmovd %eax,%xmm3
3896 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
3897 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
3898 197,44,89,211, //vmulps %ymm3,%ymm10,%ymm10
3899 184,31,0,0,0, //mov $0x1f,%eax
3900 197,249,110,216, //vmovd %eax,%xmm3
3901 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
3902 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
3903 196,193,100,84,216, //vandps %ymm8,%ymm3,%ymm3
3904 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
3905 184,8,33,4,61, //mov $0x3d042108,%eax
3906 197,249,110,216, //vmovd %eax,%xmm3
3907 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
3908 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
3909 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05003910 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05003911 196,193,124,89,193, //vmulps %ymm9,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05003912 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3913 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05003914 196,193,116,89,202, //vmulps %ymm10,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05003915 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3916 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
3917 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
3918 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003919 184,0,0,128,63, //mov $0x3f800000,%eax
3920 197,249,110,216, //vmovd %eax,%xmm3
3921 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
3922 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05003923 72,173, //lods %ds:(%rsi),%rax
3924 255,224, //jmpq *%rax
3925 65,137,200, //mov %ecx,%r8d
3926 65,128,224,7, //and $0x7,%r8b
3927 196,65,57,239,192, //vpxor %xmm8,%xmm8,%xmm8
3928 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05003929 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07003930 15,135,243,254,255,255, //ja 6c6 <_sk_lerp_565_avx+0x14>
3931 69,15,182,192, //movzbl %r8b,%r8d
Mike Klein5224f462017-03-07 17:29:54 -05003932 76,141,13,74,0,0,0, //lea 0x4a(%rip),%r9 # 828 <_sk_lerp_565_avx+0x176>
Mike Klein894d5612017-03-07 07:59:52 -05003933 75,99,4,129, //movslq (%r9,%r8,4),%rax
3934 76,1,200, //add %r9,%rax
3935 255,224, //jmpq *%rax
3936 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
3937 196,65,97,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm8
3938 196,65,57,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm8,%xmm8
3939 196,65,57,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm8,%xmm8
3940 196,65,57,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm8,%xmm8
3941 196,65,57,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
3942 196,65,57,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
3943 196,65,57,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05003944 233,159,254,255,255, //jmpq 6c6 <_sk_lerp_565_avx+0x14>
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003945 144, //nop
3946 243,255, //repz (bad)
3947 255, //(bad)
3948 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05003949 235,255, //jmp 82d <_sk_lerp_565_avx+0x17b>
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003950 255, //(bad)
3951 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05003952 255, //(bad)
3953 255, //(bad)
3954 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003955 219,255, //(bad)
3956 255, //(bad)
3957 255,211, //callq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05003958 255, //(bad)
3959 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003960 255,203, //dec %ebx
Mike Klein894d5612017-03-07 07:59:52 -05003961 255, //(bad)
3962 255, //(bad)
3963 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003964 191, //.byte 0xbf
Mike Klein894d5612017-03-07 07:59:52 -05003965 255, //(bad)
3966 255, //(bad)
3967 255, //.byte 0xff
3968};
3969
3970CODE const uint8_t sk_load_tables_avx[] = {
3971 85, //push %rbp
3972 65,87, //push %r15
3973 65,86, //push %r14
3974 65,85, //push %r13
3975 65,84, //push %r12
3976 83, //push %rbx
3977 72,173, //lods %ds:(%rsi),%rax
3978 76,139,0, //mov (%rax),%r8
3979 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05003980 15,133,56,2,0,0, //jne a94 <_sk_load_tables_avx+0x250>
Mike Klein894d5612017-03-07 07:59:52 -05003981 196,65,124,16,4,184, //vmovups (%r8,%rdi,4),%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05003982 187,255,0,0,0, //mov $0xff,%ebx
3983 197,249,110,195, //vmovd %ebx,%xmm0
3984 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
3985 196,99,125,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05003986 196,193,52,84,192, //vandps %ymm8,%ymm9,%ymm0
3987 196,193,249,126,193, //vmovq %xmm0,%r9
3988 69,137,203, //mov %r9d,%r11d
3989 196,195,249,22,194,1, //vpextrq $0x1,%xmm0,%r10
3990 69,137,214, //mov %r10d,%r14d
3991 73,193,234,32, //shr $0x20,%r10
3992 73,193,233,32, //shr $0x20,%r9
3993 196,227,125,25,192,1, //vextractf128 $0x1,%ymm0,%xmm0
3994 196,193,249,126,196, //vmovq %xmm0,%r12
3995 69,137,231, //mov %r12d,%r15d
3996 196,227,249,22,195,1, //vpextrq $0x1,%xmm0,%rbx
3997 65,137,221, //mov %ebx,%r13d
3998 72,193,235,32, //shr $0x20,%rbx
3999 73,193,236,32, //shr $0x20,%r12
4000 72,139,104,8, //mov 0x8(%rax),%rbp
4001 76,139,64,16, //mov 0x10(%rax),%r8
4002 196,161,122,16,68,189,0, //vmovss 0x0(%rbp,%r15,4),%xmm0
4003 196,163,121,33,68,165,0,16, //vinsertps $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05004004 196,161,122,16,76,173,0, //vmovss 0x0(%rbp,%r13,4),%xmm1
4005 196,227,121,33,193,32, //vinsertps $0x20,%xmm1,%xmm0,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05004006 197,250,16,76,157,0, //vmovss 0x0(%rbp,%rbx,4),%xmm1
4007 196,227,121,33,193,48, //vinsertps $0x30,%xmm1,%xmm0,%xmm0
4008 196,161,122,16,76,157,0, //vmovss 0x0(%rbp,%r11,4),%xmm1
4009 196,163,113,33,76,141,0,16, //vinsertps $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05004010 196,161,122,16,92,181,0, //vmovss 0x0(%rbp,%r14,4),%xmm3
4011 196,227,113,33,203,32, //vinsertps $0x20,%xmm3,%xmm1,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05004012 196,161,122,16,92,149,0, //vmovss 0x0(%rbp,%r10,4),%xmm3
4013 196,227,113,33,203,48, //vinsertps $0x30,%xmm3,%xmm1,%xmm1
4014 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
4015 196,193,113,114,208,8, //vpsrld $0x8,%xmm8,%xmm1
4016 196,67,125,25,194,1, //vextractf128 $0x1,%ymm8,%xmm10
4017 196,193,105,114,210,8, //vpsrld $0x8,%xmm10,%xmm2
4018 196,227,117,24,202,1, //vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
4019 197,180,84,201, //vandps %ymm1,%ymm9,%ymm1
4020 196,193,249,126,201, //vmovq %xmm1,%r9
4021 69,137,203, //mov %r9d,%r11d
4022 196,195,249,22,202,1, //vpextrq $0x1,%xmm1,%r10
4023 69,137,214, //mov %r10d,%r14d
4024 73,193,234,32, //shr $0x20,%r10
4025 73,193,233,32, //shr $0x20,%r9
4026 196,227,125,25,201,1, //vextractf128 $0x1,%ymm1,%xmm1
4027 196,225,249,126,205, //vmovq %xmm1,%rbp
4028 65,137,239, //mov %ebp,%r15d
4029 196,227,249,22,203,1, //vpextrq $0x1,%xmm1,%rbx
4030 65,137,220, //mov %ebx,%r12d
4031 72,193,235,32, //shr $0x20,%rbx
4032 72,193,237,32, //shr $0x20,%rbp
4033 196,129,122,16,12,184, //vmovss (%r8,%r15,4),%xmm1
4034 196,195,113,33,12,168,16, //vinsertps $0x10,(%r8,%rbp,4),%xmm1,%xmm1
4035 196,129,122,16,20,160, //vmovss (%r8,%r12,4),%xmm2
4036 196,227,113,33,202,32, //vinsertps $0x20,%xmm2,%xmm1,%xmm1
4037 196,193,122,16,20,152, //vmovss (%r8,%rbx,4),%xmm2
4038 196,227,113,33,202,48, //vinsertps $0x30,%xmm2,%xmm1,%xmm1
4039 196,129,122,16,20,152, //vmovss (%r8,%r11,4),%xmm2
4040 196,131,105,33,20,136,16, //vinsertps $0x10,(%r8,%r9,4),%xmm2,%xmm2
4041 196,129,122,16,28,176, //vmovss (%r8,%r14,4),%xmm3
4042 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
4043 196,129,122,16,28,144, //vmovss (%r8,%r10,4),%xmm3
4044 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
4045 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
4046 72,139,64,24, //mov 0x18(%rax),%rax
4047 196,193,105,114,208,16, //vpsrld $0x10,%xmm8,%xmm2
4048 196,193,97,114,210,16, //vpsrld $0x10,%xmm10,%xmm3
4049 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
4050 197,180,84,210, //vandps %ymm2,%ymm9,%ymm2
4051 196,193,249,126,208, //vmovq %xmm2,%r8
4052 69,137,194, //mov %r8d,%r10d
4053 196,195,249,22,209,1, //vpextrq $0x1,%xmm2,%r9
4054 69,137,203, //mov %r9d,%r11d
4055 73,193,233,32, //shr $0x20,%r9
4056 73,193,232,32, //shr $0x20,%r8
4057 196,227,125,25,210,1, //vextractf128 $0x1,%ymm2,%xmm2
4058 196,225,249,126,213, //vmovq %xmm2,%rbp
4059 65,137,238, //mov %ebp,%r14d
4060 196,227,249,22,211,1, //vpextrq $0x1,%xmm2,%rbx
4061 65,137,223, //mov %ebx,%r15d
4062 72,193,235,32, //shr $0x20,%rbx
4063 72,193,237,32, //shr $0x20,%rbp
4064 196,161,122,16,20,176, //vmovss (%rax,%r14,4),%xmm2
4065 196,227,105,33,20,168,16, //vinsertps $0x10,(%rax,%rbp,4),%xmm2,%xmm2
4066 196,161,122,16,28,184, //vmovss (%rax,%r15,4),%xmm3
4067 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
4068 197,250,16,28,152, //vmovss (%rax,%rbx,4),%xmm3
4069 196,99,105,33,203,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm9
4070 196,161,122,16,28,144, //vmovss (%rax,%r10,4),%xmm3
4071 196,163,97,33,28,128,16, //vinsertps $0x10,(%rax,%r8,4),%xmm3,%xmm3
4072 196,161,122,16,20,152, //vmovss (%rax,%r11,4),%xmm2
4073 196,227,97,33,210,32, //vinsertps $0x20,%xmm2,%xmm3,%xmm2
4074 196,161,122,16,28,136, //vmovss (%rax,%r9,4),%xmm3
4075 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
4076 196,195,109,24,209,1, //vinsertf128 $0x1,%xmm9,%ymm2,%ymm2
4077 196,193,57,114,208,24, //vpsrld $0x18,%xmm8,%xmm8
4078 196,193,97,114,210,24, //vpsrld $0x18,%xmm10,%xmm3
4079 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05004080 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
4081 184,129,128,128,59, //mov $0x3b808081,%eax
4082 197,249,110,216, //vmovd %eax,%xmm3
4083 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
4084 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
4085 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05004086 72,173, //lods %ds:(%rsi),%rax
4087 91, //pop %rbx
4088 65,92, //pop %r12
4089 65,93, //pop %r13
4090 65,94, //pop %r14
4091 65,95, //pop %r15
4092 93, //pop %rbp
4093 255,224, //jmpq *%rax
Mike Klein5224f462017-03-07 17:29:54 -05004094 137,203, //mov %ecx,%ebx
4095 128,227,7, //and $0x7,%bl
Mike Klein894d5612017-03-07 07:59:52 -05004096 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05004097 254,203, //dec %bl
Mike Klein5224f462017-03-07 17:29:54 -05004098 128,251,6, //cmp $0x6,%bl
Mike Klein64b97482017-03-14 17:35:04 -07004099 15,135,185,253,255,255, //ja 862 <_sk_load_tables_avx+0x1e>
4100 15,182,219, //movzbl %bl,%ebx
Mike Klein5224f462017-03-07 17:29:54 -05004101 76,141,13,137,0,0,0, //lea 0x89(%rip),%r9 # b3c <_sk_load_tables_avx+0x2f8>
4102 73,99,28,153, //movslq (%r9,%rbx,4),%rbx
4103 76,1,203, //add %r9,%rbx
4104 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05004105 196,193,121,110,68,184,24, //vmovd 0x18(%r8,%rdi,4),%xmm0
4106 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
4107 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
4108 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
4109 196,99,117,12,192,64, //vblendps $0x40,%ymm0,%ymm1,%ymm8
4110 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
4111 196,195,121,34,68,184,20,1, //vpinsrd $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
4112 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
4113 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
4114 196,195,121,34,68,184,16,0, //vpinsrd $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
4115 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
4116 196,195,57,34,68,184,12,3, //vpinsrd $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
4117 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
4118 196,195,57,34,68,184,8,2, //vpinsrd $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
4119 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
4120 196,195,57,34,68,184,4,1, //vpinsrd $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
4121 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
4122 196,195,57,34,4,184,0, //vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
4123 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05004124 233,38,253,255,255, //jmpq 862 <_sk_load_tables_avx+0x1e>
Mike Klein894d5612017-03-07 07:59:52 -05004125 238, //out %al,(%dx)
4126 255, //(bad)
4127 255, //(bad)
4128 255,224, //jmpq *%rax
4129 255, //(bad)
4130 255, //(bad)
4131 255,210, //callq *%rdx
4132 255, //(bad)
4133 255, //(bad)
4134 255,196, //inc %esp
4135 255, //(bad)
4136 255, //(bad)
4137 255,176,255,255,255,156, //pushq -0x63000001(%rax)
4138 255, //(bad)
4139 255, //(bad)
4140 255, //.byte 0xff
4141 128,255,255, //cmp $0xff,%bh
4142 255, //.byte 0xff
4143};
4144
4145CODE const uint8_t sk_load_a8_avx[] = {
4146 73,137,200, //mov %rcx,%r8
4147 72,173, //lods %ds:(%rsi),%rax
4148 72,139,0, //mov (%rax),%rax
4149 72,1,248, //add %rdi,%rax
4150 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05004151 117,74, //jne bb2 <_sk_load_a8_avx+0x5a>
Mike Klein64b97482017-03-14 17:35:04 -07004152 197,250,126,0, //vmovq (%rax),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05004153 196,226,121,49,200, //vpmovzxbd %xmm0,%xmm1
4154 196,227,121,4,192,229, //vpermilps $0xe5,%xmm0,%xmm0
4155 196,226,121,49,192, //vpmovzxbd %xmm0,%xmm0
4156 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
4157 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05004158 184,129,128,128,59, //mov $0x3b808081,%eax
4159 197,249,110,200, //vmovd %eax,%xmm1
4160 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
4161 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05004162 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
4163 72,173, //lods %ds:(%rsi),%rax
4164 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
4165 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
4166 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
4167 76,137,193, //mov %r8,%rcx
4168 255,224, //jmpq *%rax
4169 49,201, //xor %ecx,%ecx
4170 77,137,194, //mov %r8,%r10
4171 69,49,201, //xor %r9d,%r9d
4172 68,15,182,24, //movzbl (%rax),%r11d
4173 72,255,192, //inc %rax
4174 73,211,227, //shl %cl,%r11
4175 77,9,217, //or %r11,%r9
4176 72,131,193,8, //add $0x8,%rcx
4177 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05004178 117,234, //jne bba <_sk_load_a8_avx+0x62>
Mike Klein894d5612017-03-07 07:59:52 -05004179 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05004180 235,149, //jmp b6c <_sk_load_a8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05004181};
4182
4183CODE const uint8_t sk_store_a8_avx[] = {
4184 72,173, //lods %ds:(%rsi),%rax
4185 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05004186 184,0,0,127,67, //mov $0x437f0000,%eax
4187 197,121,110,192, //vmovd %eax,%xmm8
4188 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
4189 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05004190 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
4191 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
4192 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4193 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
4194 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
4195 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05004196 117,10, //jne c19 <_sk_store_a8_avx+0x42>
Mike Klein894d5612017-03-07 07:59:52 -05004197 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
4198 72,173, //lods %ds:(%rsi),%rax
4199 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07004200 65,137,200, //mov %ecx,%r8d
4201 65,128,224,7, //and $0x7,%r8b
4202 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05004203 65,128,248,6, //cmp $0x6,%r8b
Mike Klein5224f462017-03-07 17:29:54 -05004204 119,236, //ja c15 <_sk_store_a8_avx+0x3e>
Mike Klein894d5612017-03-07 07:59:52 -05004205 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07004206 65,15,182,192, //movzbl %r8b,%eax
4207 76,141,5,67,0,0,0, //lea 0x43(%rip),%r8 # c7c <_sk_store_a8_avx+0xa5>
4208 73,99,4,128, //movslq (%r8,%rax,4),%rax
4209 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05004210 255,224, //jmpq *%rax
4211 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
4212 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
4213 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
4214 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
4215 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
4216 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
4217 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Klein64b97482017-03-14 17:35:04 -07004218 235,154, //jmp c15 <_sk_store_a8_avx+0x3e>
Mike Klein5224f462017-03-07 17:29:54 -05004219 144, //nop
4220 246,255, //idiv %bh
4221 255, //(bad)
4222 255, //(bad)
4223 238, //out %al,(%dx)
4224 255, //(bad)
4225 255, //(bad)
4226 255,230, //jmpq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05004227 255, //(bad)
4228 255, //(bad)
4229 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05004230 222,255, //fdivrp %st,%st(7)
4231 255, //(bad)
4232 255,214, //callq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05004233 255, //(bad)
4234 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05004235 255,206, //dec %esi
Mike Klein894d5612017-03-07 07:59:52 -05004236 255, //(bad)
4237 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05004238 255,198, //inc %esi
Mike Klein894d5612017-03-07 07:59:52 -05004239 255, //(bad)
4240 255, //(bad)
4241 255, //.byte 0xff
4242};
4243
4244CODE const uint8_t sk_load_565_avx[] = {
4245 72,173, //lods %ds:(%rsi),%rax
4246 76,139,16, //mov (%rax),%r10
4247 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004248 15,133,209,0,0,0, //jne d77 <_sk_load_565_avx+0xdf>
Mike Klein894d5612017-03-07 07:59:52 -05004249 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
4250 197,241,239,201, //vpxor %xmm1,%xmm1,%xmm1
4251 197,249,105,201, //vpunpckhwd %xmm1,%xmm0,%xmm1
4252 196,226,121,51,192, //vpmovzxwd %xmm0,%xmm0
4253 196,227,125,24,209,1, //vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05004254 184,0,248,0,0, //mov $0xf800,%eax
4255 197,249,110,192, //vmovd %eax,%xmm0
4256 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
4257 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05004258 197,252,84,194, //vandps %ymm2,%ymm0,%ymm0
4259 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05004260 184,8,33,132,55, //mov $0x37842108,%eax
4261 197,249,110,200, //vmovd %eax,%xmm1
4262 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
4263 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
4264 197,252,89,193, //vmulps %ymm1,%ymm0,%ymm0
4265 184,224,7,0,0, //mov $0x7e0,%eax
4266 197,249,110,200, //vmovd %eax,%xmm1
4267 197,249,112,201,0, //vpshufd $0x0,%xmm1,%xmm1
4268 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05004269 197,244,84,202, //vandps %ymm2,%ymm1,%ymm1
4270 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05004271 184,33,8,2,58, //mov $0x3a020821,%eax
4272 197,249,110,216, //vmovd %eax,%xmm3
4273 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
4274 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
4275 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
4276 184,31,0,0,0, //mov $0x1f,%eax
4277 197,249,110,216, //vmovd %eax,%xmm3
4278 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
4279 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05004280 197,228,84,210, //vandps %ymm2,%ymm3,%ymm2
4281 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05004282 184,8,33,4,61, //mov $0x3d042108,%eax
4283 197,249,110,216, //vmovd %eax,%xmm3
4284 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
4285 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
4286 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
4287 184,0,0,128,63, //mov $0x3f800000,%eax
4288 197,249,110,216, //vmovd %eax,%xmm3
4289 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
4290 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05004291 72,173, //lods %ds:(%rsi),%rax
4292 255,224, //jmpq *%rax
4293 65,137,200, //mov %ecx,%r8d
4294 65,128,224,7, //and $0x7,%r8b
4295 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
4296 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05004297 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07004298 15,135,29,255,255,255, //ja cac <_sk_load_565_avx+0x14>
4299 69,15,182,192, //movzbl %r8b,%r8d
4300 76,141,13,74,0,0,0, //lea 0x4a(%rip),%r9 # de4 <_sk_load_565_avx+0x14c>
Mike Klein894d5612017-03-07 07:59:52 -05004301 75,99,4,129, //movslq (%r9,%r8,4),%rax
4302 76,1,200, //add %r9,%rax
4303 255,224, //jmpq *%rax
4304 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
4305 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
4306 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
4307 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
4308 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
4309 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
4310 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
4311 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07004312 233,201,254,255,255, //jmpq cac <_sk_load_565_avx+0x14>
Mike Klein5224f462017-03-07 17:29:54 -05004313 144, //nop
4314 243,255, //repz (bad)
4315 255, //(bad)
4316 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07004317 235,255, //jmp de9 <_sk_load_565_avx+0x151>
Mike Klein5224f462017-03-07 17:29:54 -05004318 255, //(bad)
4319 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05004320 255, //(bad)
4321 255, //(bad)
4322 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05004323 219,255, //(bad)
4324 255, //(bad)
4325 255,211, //callq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05004326 255, //(bad)
4327 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05004328 255,203, //dec %ebx
Mike Klein894d5612017-03-07 07:59:52 -05004329 255, //(bad)
4330 255, //(bad)
4331 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05004332 191, //.byte 0xbf
Mike Klein894d5612017-03-07 07:59:52 -05004333 255, //(bad)
4334 255, //(bad)
4335 255, //.byte 0xff
4336};
4337
4338CODE const uint8_t sk_store_565_avx[] = {
4339 72,173, //lods %ds:(%rsi),%rax
4340 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05004341 184,0,0,248,65, //mov $0x41f80000,%eax
4342 197,121,110,192, //vmovd %eax,%xmm8
4343 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
4344 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05004345 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
4346 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
4347 196,193,41,114,241,11, //vpslld $0xb,%xmm9,%xmm10
4348 196,67,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm9
4349 196,193,49,114,241,11, //vpslld $0xb,%xmm9,%xmm9
4350 196,67,45,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
Mike Klein5224f462017-03-07 17:29:54 -05004351 184,0,0,124,66, //mov $0x427c0000,%eax
4352 197,121,110,208, //vmovd %eax,%xmm10
4353 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
4354 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05004355 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
4356 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
4357 196,193,33,114,242,5, //vpslld $0x5,%xmm10,%xmm11
4358 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
4359 196,193,41,114,242,5, //vpslld $0x5,%xmm10,%xmm10
4360 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
4361 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
4362 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
4363 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
4364 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
4365 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4366 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
4367 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004368 117,10, //jne e9e <_sk_store_565_avx+0x9e>
Mike Klein894d5612017-03-07 07:59:52 -05004369 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
4370 72,173, //lods %ds:(%rsi),%rax
4371 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07004372 65,137,200, //mov %ecx,%r8d
4373 65,128,224,7, //and $0x7,%r8b
4374 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05004375 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07004376 119,236, //ja e9a <_sk_store_565_avx+0x9a>
4377 65,15,182,192, //movzbl %r8b,%eax
4378 76,141,5,67,0,0,0, //lea 0x43(%rip),%r8 # efc <_sk_store_565_avx+0xfc>
4379 73,99,4,128, //movslq (%r8,%rax,4),%rax
4380 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05004381 255,224, //jmpq *%rax
4382 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
4383 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
4384 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
4385 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
4386 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
4387 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
Mike Klein64b97482017-03-14 17:35:04 -07004388 196,67,121,21,4,121,0, //vpextrw $0x0,%xmm8,(%r9,%rdi,2)
4389 235,159, //jmp e9a <_sk_store_565_avx+0x9a>
4390 144, //nop
4391 246,255, //idiv %bh
Mike Klein894d5612017-03-07 07:59:52 -05004392 255, //(bad)
4393 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07004394 238, //out %al,(%dx)
Mike Klein894d5612017-03-07 07:59:52 -05004395 255, //(bad)
4396 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07004397 255,230, //jmpq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05004398 255, //(bad)
4399 255, //(bad)
4400 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07004401 222,255, //fdivrp %st,%st(7)
Mike Klein894d5612017-03-07 07:59:52 -05004402 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07004403 255,214, //callq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05004404 255, //(bad)
4405 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07004406 255,206, //dec %esi
Mike Klein894d5612017-03-07 07:59:52 -05004407 255, //(bad)
4408 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07004409 255,198, //inc %esi
Mike Klein894d5612017-03-07 07:59:52 -05004410 255, //(bad)
4411 255, //(bad)
4412 255, //.byte 0xff
4413};
4414
4415CODE const uint8_t sk_load_8888_avx[] = {
4416 72,173, //lods %ds:(%rsi),%rax
4417 76,139,16, //mov (%rax),%r10
4418 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004419 15,133,157,0,0,0, //jne fc3 <_sk_load_8888_avx+0xab>
Mike Klein894d5612017-03-07 07:59:52 -05004420 196,65,124,16,12,186, //vmovups (%r10,%rdi,4),%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004421 184,255,0,0,0, //mov $0xff,%eax
4422 197,249,110,192, //vmovd %eax,%xmm0
4423 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
4424 196,99,125,24,216,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm11
Mike Klein894d5612017-03-07 07:59:52 -05004425 196,193,36,84,193, //vandps %ymm9,%ymm11,%ymm0
4426 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004427 184,129,128,128,59, //mov $0x3b808081,%eax
4428 197,249,110,200, //vmovd %eax,%xmm1
4429 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
4430 196,99,117,24,193,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm8
4431 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05004432 196,193,41,114,209,8, //vpsrld $0x8,%xmm9,%xmm10
4433 196,99,125,25,203,1, //vextractf128 $0x1,%ymm9,%xmm3
4434 197,241,114,211,8, //vpsrld $0x8,%xmm3,%xmm1
4435 196,227,45,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm10,%ymm1
4436 197,164,84,201, //vandps %ymm1,%ymm11,%ymm1
4437 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004438 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05004439 196,193,41,114,209,16, //vpsrld $0x10,%xmm9,%xmm10
4440 197,233,114,211,16, //vpsrld $0x10,%xmm3,%xmm2
4441 196,227,45,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm10,%ymm2
4442 197,164,84,210, //vandps %ymm2,%ymm11,%ymm2
4443 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004444 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05004445 196,193,49,114,209,24, //vpsrld $0x18,%xmm9,%xmm9
4446 197,225,114,211,24, //vpsrld $0x18,%xmm3,%xmm3
4447 196,227,53,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
4448 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
4449 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
4450 72,173, //lods %ds:(%rsi),%rax
4451 255,224, //jmpq *%rax
4452 65,137,200, //mov %ecx,%r8d
4453 65,128,224,7, //and $0x7,%r8b
4454 196,65,52,87,201, //vxorps %ymm9,%ymm9,%ymm9
4455 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05004456 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07004457 15,135,80,255,255,255, //ja f2c <_sk_load_8888_avx+0x14>
4458 69,15,182,192, //movzbl %r8b,%r8d
4459 76,141,13,137,0,0,0, //lea 0x89(%rip),%r9 # 1070 <_sk_load_8888_avx+0x158>
Mike Klein894d5612017-03-07 07:59:52 -05004460 75,99,4,129, //movslq (%r9,%r8,4),%rax
4461 76,1,200, //add %r9,%rax
4462 255,224, //jmpq *%rax
4463 196,193,121,110,68,186,24, //vmovd 0x18(%r10,%rdi,4),%xmm0
4464 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
4465 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
4466 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
4467 196,99,117,12,200,64, //vblendps $0x40,%ymm0,%ymm1,%ymm9
4468 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
4469 196,195,121,34,68,186,20,1, //vpinsrd $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
4470 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
4471 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
4472 196,195,121,34,68,186,16,0, //vpinsrd $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
4473 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
4474 196,195,49,34,68,186,12,3, //vpinsrd $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
4475 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
4476 196,195,49,34,68,186,8,2, //vpinsrd $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
4477 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
4478 196,195,49,34,68,186,4,1, //vpinsrd $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
4479 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
4480 196,195,49,34,4,186,0, //vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
4481 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
Mike Klein64b97482017-03-14 17:35:04 -07004482 233,188,254,255,255, //jmpq f2c <_sk_load_8888_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05004483 238, //out %al,(%dx)
4484 255, //(bad)
4485 255, //(bad)
4486 255,224, //jmpq *%rax
4487 255, //(bad)
4488 255, //(bad)
4489 255,210, //callq *%rdx
4490 255, //(bad)
4491 255, //(bad)
4492 255,196, //inc %esp
4493 255, //(bad)
4494 255, //(bad)
4495 255,176,255,255,255,156, //pushq -0x63000001(%rax)
4496 255, //(bad)
4497 255, //(bad)
4498 255, //.byte 0xff
4499 128,255,255, //cmp $0xff,%bh
4500 255, //.byte 0xff
4501};
4502
4503CODE const uint8_t sk_store_8888_avx[] = {
4504 72,173, //lods %ds:(%rsi),%rax
4505 76,139,8, //mov (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004506 184,0,0,127,67, //mov $0x437f0000,%eax
4507 197,121,110,192, //vmovd %eax,%xmm8
4508 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
4509 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05004510 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
4511 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
4512 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
4513 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
4514 196,193,33,114,242,8, //vpslld $0x8,%xmm10,%xmm11
4515 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
4516 196,193,41,114,242,8, //vpslld $0x8,%xmm10,%xmm10
4517 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
4518 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
4519 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
4520 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
4521 196,193,33,114,242,16, //vpslld $0x10,%xmm10,%xmm11
4522 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
4523 196,193,41,114,242,16, //vpslld $0x10,%xmm10,%xmm10
4524 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
4525 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
4526 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
4527 196,193,33,114,240,24, //vpslld $0x18,%xmm8,%xmm11
4528 196,67,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm8
4529 196,193,57,114,240,24, //vpslld $0x18,%xmm8,%xmm8
4530 196,67,37,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm11,%ymm8
4531 196,65,45,86,192, //vorpd %ymm8,%ymm10,%ymm8
4532 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
4533 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004534 117,10, //jne 1130 <_sk_store_8888_avx+0xa4>
Mike Klein894d5612017-03-07 07:59:52 -05004535 196,65,124,17,4,185, //vmovups %ymm8,(%r9,%rdi,4)
4536 72,173, //lods %ds:(%rsi),%rax
4537 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07004538 65,137,200, //mov %ecx,%r8d
4539 65,128,224,7, //and $0x7,%r8b
4540 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05004541 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07004542 119,236, //ja 112c <_sk_store_8888_avx+0xa0>
4543 65,15,182,192, //movzbl %r8b,%eax
4544 76,141,5,85,0,0,0, //lea 0x55(%rip),%r8 # 11a0 <_sk_store_8888_avx+0x114>
4545 73,99,4,128, //movslq (%r8,%rax,4),%rax
4546 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05004547 255,224, //jmpq *%rax
4548 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4549 196,67,121,22,76,185,24,2, //vpextrd $0x2,%xmm9,0x18(%r9,%rdi,4)
4550 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4551 196,67,121,22,76,185,20,1, //vpextrd $0x1,%xmm9,0x14(%r9,%rdi,4)
4552 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07004553 196,65,122,17,76,185,16, //vmovss %xmm9,0x10(%r9,%rdi,4)
Mike Klein894d5612017-03-07 07:59:52 -05004554 196,67,121,22,68,185,12,3, //vpextrd $0x3,%xmm8,0xc(%r9,%rdi,4)
4555 196,67,121,22,68,185,8,2, //vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
4556 196,67,121,22,68,185,4,1, //vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
4557 196,65,121,126,4,185, //vmovd %xmm8,(%r9,%rdi,4)
Mike Klein64b97482017-03-14 17:35:04 -07004558 235,143, //jmp 112c <_sk_store_8888_avx+0xa0>
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004559 15,31,0, //nopl (%rax)
4560 245, //cmc
Mike Klein894d5612017-03-07 07:59:52 -05004561 255, //(bad)
4562 255, //(bad)
4563 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004564 237, //in (%dx),%eax
Mike Klein894d5612017-03-07 07:59:52 -05004565 255, //(bad)
4566 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004567 255,229, //jmpq *%rbp
4568 255, //(bad)
4569 255, //(bad)
4570 255, //(bad)
4571 221,255, //(bad)
4572 255, //(bad)
4573 255,208, //callq *%rax
4574 255, //(bad)
4575 255, //(bad)
4576 255,194, //inc %edx
Mike Klein894d5612017-03-07 07:59:52 -05004577 255, //(bad)
4578 255, //(bad)
4579 255, //.byte 0xff
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004580 180,255, //mov $0xff,%ah
Mike Klein894d5612017-03-07 07:59:52 -05004581 255, //(bad)
4582 255, //.byte 0xff
4583};
4584
4585CODE const uint8_t sk_load_f16_avx[] = {
4586 72,173, //lods %ds:(%rsi),%rax
4587 72,139,0, //mov (%rax),%rax
4588 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004589 15,133,2,1,0,0, //jne 12cc <_sk_load_f16_avx+0x110>
4590 197,121,16,4,248, //vmovupd (%rax,%rdi,8),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05004591 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
4592 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07004593 197,122,111,76,248,48, //vmovdqu 0x30(%rax,%rdi,8),%xmm9
4594 197,185,97,194, //vpunpcklwd %xmm2,%xmm8,%xmm0
4595 197,185,105,210, //vpunpckhwd %xmm2,%xmm8,%xmm2
4596 196,193,97,97,201, //vpunpcklwd %xmm9,%xmm3,%xmm1
4597 196,193,97,105,217, //vpunpckhwd %xmm9,%xmm3,%xmm3
4598 197,121,97,194, //vpunpcklwd %xmm2,%xmm0,%xmm8
4599 197,249,105,194, //vpunpckhwd %xmm2,%xmm0,%xmm0
4600 197,241,97,211, //vpunpcklwd %xmm3,%xmm1,%xmm2
4601 197,113,105,203, //vpunpckhwd %xmm3,%xmm1,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05004602 184,0,4,0,4, //mov $0x4000400,%eax
4603 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05004604 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07004605 196,193,97,101,200, //vpcmpgtw %xmm8,%xmm3,%xmm1
4606 196,65,113,223,192, //vpandn %xmm8,%xmm1,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05004607 197,225,101,200, //vpcmpgtw %xmm0,%xmm3,%xmm1
4608 197,241,223,192, //vpandn %xmm0,%xmm1,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07004609 197,225,101,202, //vpcmpgtw %xmm2,%xmm3,%xmm1
4610 197,241,223,202, //vpandn %xmm2,%xmm1,%xmm1
4611 196,193,97,101,209, //vpcmpgtw %xmm9,%xmm3,%xmm2
4612 196,193,105,223,209, //vpandn %xmm9,%xmm2,%xmm2
4613 196,66,121,51,208, //vpmovzxwd %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05004614 196,98,121,51,201, //vpmovzxwd %xmm1,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07004615 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
4616 197,57,105,195, //vpunpckhwd %xmm3,%xmm8,%xmm8
4617 197,241,105,203, //vpunpckhwd %xmm3,%xmm1,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05004618 196,98,121,51,216, //vpmovzxwd %xmm0,%xmm11
Mike Klein64b97482017-03-14 17:35:04 -07004619 196,98,121,51,226, //vpmovzxwd %xmm2,%xmm12
4620 197,121,105,235, //vpunpckhwd %xmm3,%xmm0,%xmm13
4621 197,105,105,243, //vpunpckhwd %xmm3,%xmm2,%xmm14
4622 196,193,121,114,242,13, //vpslld $0xd,%xmm10,%xmm0
4623 196,193,105,114,241,13, //vpslld $0xd,%xmm9,%xmm2
4624 196,227,125,24,194,1, //vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05004625 184,0,0,128,119, //mov $0x77800000,%eax
Mike Klein64b97482017-03-14 17:35:04 -07004626 197,249,110,208, //vmovd %eax,%xmm2
4627 197,249,112,210,0, //vpshufd $0x0,%xmm2,%xmm2
4628 196,99,109,24,202,1, //vinsertf128 $0x1,%xmm2,%ymm2,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05004629 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
Mike Klein64b97482017-03-14 17:35:04 -07004630 196,193,105,114,240,13, //vpslld $0xd,%xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05004631 197,241,114,241,13, //vpslld $0xd,%xmm1,%xmm1
4632 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
4633 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
Mike Klein64b97482017-03-14 17:35:04 -07004634 196,193,105,114,243,13, //vpslld $0xd,%xmm11,%xmm2
4635 196,193,97,114,244,13, //vpslld $0xd,%xmm12,%xmm3
4636 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05004637 197,180,89,210, //vmulps %ymm2,%ymm9,%ymm2
4638 196,193,57,114,245,13, //vpslld $0xd,%xmm13,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07004639 196,193,97,114,246,13, //vpslld $0xd,%xmm14,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05004640 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
4641 197,180,89,219, //vmulps %ymm3,%ymm9,%ymm3
4642 72,173, //lods %ds:(%rsi),%rax
4643 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07004644 197,123,16,4,248, //vmovsd (%rax,%rdi,8),%xmm8
4645 196,65,49,239,201, //vpxor %xmm9,%xmm9,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05004646 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004647 116,79, //je 132b <_sk_load_f16_avx+0x16f>
4648 197,57,22,68,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05004649 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004650 114,67, //jb 132b <_sk_load_f16_avx+0x16f>
Mike Klein894d5612017-03-07 07:59:52 -05004651 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
4652 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004653 116,68, //je 1338 <_sk_load_f16_avx+0x17c>
Mike Klein894d5612017-03-07 07:59:52 -05004654 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
4655 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004656 114,56, //jb 1338 <_sk_load_f16_avx+0x17c>
Mike Klein894d5612017-03-07 07:59:52 -05004657 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
4658 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004659 15,132,209,254,255,255, //je 11e1 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05004660 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
4661 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004662 15,130,193,254,255,255, //jb 11e1 <_sk_load_f16_avx+0x25>
4663 197,122,126,76,248,48, //vmovq 0x30(%rax,%rdi,8),%xmm9
4664 233,182,254,255,255, //jmpq 11e1 <_sk_load_f16_avx+0x25>
4665 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
4666 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
4667 233,169,254,255,255, //jmpq 11e1 <_sk_load_f16_avx+0x25>
4668 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
4669 233,160,254,255,255, //jmpq 11e1 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05004670};
4671
4672CODE const uint8_t sk_store_f16_avx[] = {
4673 72,173, //lods %ds:(%rsi),%rax
Mike Klein5224f462017-03-07 17:29:54 -05004674 76,139,0, //mov (%rax),%r8
4675 184,0,0,128,7, //mov $0x7800000,%eax
4676 197,121,110,192, //vmovd %eax,%xmm8
4677 196,65,121,112,192,0, //vpshufd $0x0,%xmm8,%xmm8
4678 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05004679 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
4680 196,67,125,25,202,1, //vextractf128 $0x1,%ymm9,%xmm10
4681 196,193,41,114,210,13, //vpsrld $0xd,%xmm10,%xmm10
4682 196,193,49,114,209,13, //vpsrld $0xd,%xmm9,%xmm9
4683 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
4684 196,67,125,25,220,1, //vextractf128 $0x1,%ymm11,%xmm12
4685 196,193,25,114,212,13, //vpsrld $0xd,%xmm12,%xmm12
4686 196,193,33,114,211,13, //vpsrld $0xd,%xmm11,%xmm11
4687 197,60,89,234, //vmulps %ymm2,%ymm8,%ymm13
4688 196,67,125,25,238,1, //vextractf128 $0x1,%ymm13,%xmm14
4689 196,193,9,114,214,13, //vpsrld $0xd,%xmm14,%xmm14
4690 196,193,17,114,213,13, //vpsrld $0xd,%xmm13,%xmm13
4691 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
4692 196,67,125,25,199,1, //vextractf128 $0x1,%ymm8,%xmm15
4693 196,193,1,114,215,13, //vpsrld $0xd,%xmm15,%xmm15
4694 196,193,57,114,208,13, //vpsrld $0xd,%xmm8,%xmm8
4695 196,193,33,115,251,2, //vpslldq $0x2,%xmm11,%xmm11
4696 196,65,33,235,201, //vpor %xmm9,%xmm11,%xmm9
4697 196,193,33,115,252,2, //vpslldq $0x2,%xmm12,%xmm11
4698 196,65,33,235,226, //vpor %xmm10,%xmm11,%xmm12
4699 196,193,57,115,248,2, //vpslldq $0x2,%xmm8,%xmm8
4700 196,65,57,235,197, //vpor %xmm13,%xmm8,%xmm8
4701 196,193,41,115,255,2, //vpslldq $0x2,%xmm15,%xmm10
4702 196,65,41,235,238, //vpor %xmm14,%xmm10,%xmm13
4703 196,65,49,98,216, //vpunpckldq %xmm8,%xmm9,%xmm11
4704 196,65,49,106,208, //vpunpckhdq %xmm8,%xmm9,%xmm10
4705 196,65,25,98,205, //vpunpckldq %xmm13,%xmm12,%xmm9
4706 196,65,25,106,197, //vpunpckhdq %xmm13,%xmm12,%xmm8
4707 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004708 117,31, //jne 1417 <_sk_store_f16_avx+0xd6>
Mike Klein5224f462017-03-07 17:29:54 -05004709 196,65,120,17,28,248, //vmovups %xmm11,(%r8,%rdi,8)
4710 196,65,120,17,84,248,16, //vmovups %xmm10,0x10(%r8,%rdi,8)
4711 196,65,120,17,76,248,32, //vmovups %xmm9,0x20(%r8,%rdi,8)
4712 196,65,122,127,68,248,48, //vmovdqu %xmm8,0x30(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -05004713 72,173, //lods %ds:(%rsi),%rax
4714 255,224, //jmpq *%rax
Mike Klein5224f462017-03-07 17:29:54 -05004715 196,65,121,214,28,248, //vmovq %xmm11,(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -05004716 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004717 116,240, //je 1413 <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -05004718 196,65,121,23,92,248,8, //vmovhpd %xmm11,0x8(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -05004719 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004720 114,227, //jb 1413 <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -05004721 196,65,121,214,84,248,16, //vmovq %xmm10,0x10(%r8,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07004722 116,218, //je 1413 <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -05004723 196,65,121,23,84,248,24, //vmovhpd %xmm10,0x18(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -05004724 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004725 114,205, //jb 1413 <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -05004726 196,65,121,214,76,248,32, //vmovq %xmm9,0x20(%r8,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07004727 116,196, //je 1413 <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -05004728 196,65,121,23,76,248,40, //vmovhpd %xmm9,0x28(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -05004729 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004730 114,183, //jb 1413 <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -05004731 196,65,121,214,68,248,48, //vmovq %xmm8,0x30(%r8,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07004732 235,174, //jmp 1413 <_sk_store_f16_avx+0xd2>
Mike Klein894d5612017-03-07 07:59:52 -05004733};
4734
4735CODE const uint8_t sk_store_f32_avx[] = {
4736 72,173, //lods %ds:(%rsi),%rax
4737 76,139,0, //mov (%rax),%r8
4738 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
4739 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
4740 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
4741 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
4742 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
4743 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
4744 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
4745 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
4746 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
4747 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004748 117,55, //jne 14d2 <_sk_store_f32_avx+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05004749 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
4750 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
4751 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
4752 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
4753 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
4754 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
4755 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
4756 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
4757 72,173, //lods %ds:(%rsi),%rax
4758 255,224, //jmpq *%rax
4759 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
4760 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004761 116,240, //je 14ce <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004762 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
4763 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004764 114,227, //jb 14ce <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004765 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07004766 116,218, //je 14ce <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004767 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
4768 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004769 114,205, //jb 14ce <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004770 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07004771 116,195, //je 14ce <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004772 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
4773 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07004774 114,181, //jb 14ce <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004775 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07004776 235,171, //jmp 14ce <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004777};
4778
4779CODE const uint8_t sk_clamp_x_avx[] = {
4780 72,173, //lods %ds:(%rsi),%rax
4781 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4782 197,60,95,200, //vmaxps %ymm0,%ymm8,%ymm9
4783 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4784 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
4785 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4786 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
4787 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4788 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
4789 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
4790 72,173, //lods %ds:(%rsi),%rax
4791 255,224, //jmpq *%rax
4792};
4793
4794CODE const uint8_t sk_clamp_y_avx[] = {
4795 72,173, //lods %ds:(%rsi),%rax
4796 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4797 197,60,95,201, //vmaxps %ymm1,%ymm8,%ymm9
4798 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4799 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
4800 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4801 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
4802 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4803 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
4804 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
4805 72,173, //lods %ds:(%rsi),%rax
4806 255,224, //jmpq *%rax
4807};
4808
4809CODE const uint8_t sk_repeat_x_avx[] = {
4810 72,173, //lods %ds:(%rsi),%rax
4811 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4812 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
4813 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
4814 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
4815 196,65,124,92,201, //vsubps %ymm9,%ymm0,%ymm9
4816 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
4817 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4818 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
4819 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4820 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
4821 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
4822 72,173, //lods %ds:(%rsi),%rax
4823 255,224, //jmpq *%rax
4824};
4825
4826CODE const uint8_t sk_repeat_y_avx[] = {
4827 72,173, //lods %ds:(%rsi),%rax
4828 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4829 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
4830 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
4831 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
4832 196,65,116,92,201, //vsubps %ymm9,%ymm1,%ymm9
4833 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
4834 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4835 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
4836 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4837 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
4838 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
4839 72,173, //lods %ds:(%rsi),%rax
4840 255,224, //jmpq *%rax
4841};
4842
4843CODE const uint8_t sk_mirror_x_avx[] = {
4844 72,173, //lods %ds:(%rsi),%rax
Mike Klein64b97482017-03-14 17:35:04 -07004845 197,121,110,0, //vmovd (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05004846 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
4847 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
4848 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
4849 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
4850 196,227,121,4,192,0, //vpermilps $0x0,%xmm0,%xmm0
4851 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
4852 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
4853 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
4854 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
4855 197,172,92,192, //vsubps %ymm0,%ymm10,%ymm0
4856 196,193,124,92,193, //vsubps %ymm9,%ymm0,%ymm0
4857 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4858 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
4859 197,60,84,192, //vandps %ymm0,%ymm8,%ymm8
4860 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
4861 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4862 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
4863 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
4864 196,227,53,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
4865 197,188,93,192, //vminps %ymm0,%ymm8,%ymm0
4866 72,173, //lods %ds:(%rsi),%rax
4867 255,224, //jmpq *%rax
4868};
4869
4870CODE const uint8_t sk_mirror_y_avx[] = {
4871 72,173, //lods %ds:(%rsi),%rax
Mike Klein64b97482017-03-14 17:35:04 -07004872 197,121,110,0, //vmovd (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05004873 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
4874 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
4875 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
4876 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
4877 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
4878 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
4879 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
4880 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
4881 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
4882 197,172,92,201, //vsubps %ymm1,%ymm10,%ymm1
4883 196,193,116,92,201, //vsubps %ymm9,%ymm1,%ymm1
4884 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4885 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
4886 197,60,84,193, //vandps %ymm1,%ymm8,%ymm8
4887 196,99,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm1
4888 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4889 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
4890 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
4891 196,227,53,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm9,%ymm1
4892 197,188,93,201, //vminps %ymm1,%ymm8,%ymm1
4893 72,173, //lods %ds:(%rsi),%rax
4894 255,224, //jmpq *%rax
4895};
4896
Mike Kleine9ed07d2017-03-07 12:28:11 -05004897CODE const uint8_t sk_luminance_to_alpha_avx[] = {
Mike Klein5224f462017-03-07 17:29:54 -05004898 184,208,179,89,62, //mov $0x3e59b3d0,%eax
4899 197,249,110,216, //vmovd %eax,%xmm3
4900 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
4901 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05004902 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05004903 184,89,23,55,63, //mov $0x3f371759,%eax
4904 197,249,110,216, //vmovd %eax,%xmm3
4905 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
4906 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05004907 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
4908 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05004909 184,152,221,147,61, //mov $0x3d93dd98,%eax
4910 197,249,110,200, //vmovd %eax,%xmm1
4911 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
4912 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Kleine9ed07d2017-03-07 12:28:11 -05004913 197,244,89,202, //vmulps %ymm2,%ymm1,%ymm1
4914 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
4915 72,173, //lods %ds:(%rsi),%rax
4916 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
4917 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
4918 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
4919 255,224, //jmpq *%rax
4920};
4921
Mike Klein894d5612017-03-07 07:59:52 -05004922CODE const uint8_t sk_matrix_2x3_avx[] = {
4923 72,173, //lods %ds:(%rsi),%rax
4924 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4925 196,98,125,24,72,8, //vbroadcastss 0x8(%rax),%ymm9
4926 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
4927 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
4928 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4929 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
4930 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
4931 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
4932 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
4933 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
4934 197,172,89,201, //vmulps %ymm1,%ymm10,%ymm1
4935 196,193,116,88,203, //vaddps %ymm11,%ymm1,%ymm1
4936 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
4937 197,252,88,201, //vaddps %ymm1,%ymm0,%ymm1
4938 72,173, //lods %ds:(%rsi),%rax
4939 197,124,41,192, //vmovaps %ymm8,%ymm0
4940 255,224, //jmpq *%rax
4941};
4942
4943CODE const uint8_t sk_matrix_3x4_avx[] = {
4944 72,173, //lods %ds:(%rsi),%rax
4945 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4946 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
4947 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
4948 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
4949 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
4950 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4951 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
4952 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4953 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
4954 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
4955 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
4956 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
4957 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
4958 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
4959 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
4960 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
4961 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
4962 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4963 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
4964 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4965 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
4966 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
4967 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
4968 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
4969 197,156,89,210, //vmulps %ymm2,%ymm12,%ymm2
4970 196,193,108,88,213, //vaddps %ymm13,%ymm2,%ymm2
4971 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
4972 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
4973 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
4974 197,252,88,209, //vaddps %ymm1,%ymm0,%ymm2
4975 72,173, //lods %ds:(%rsi),%rax
4976 197,124,41,192, //vmovaps %ymm8,%ymm0
4977 197,124,41,201, //vmovaps %ymm9,%ymm1
4978 255,224, //jmpq *%rax
4979};
4980
Mike Kleine9ed07d2017-03-07 12:28:11 -05004981CODE const uint8_t sk_matrix_4x5_avx[] = {
4982 72,173, //lods %ds:(%rsi),%rax
4983 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4984 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
4985 196,98,125,24,80,32, //vbroadcastss 0x20(%rax),%ymm10
4986 196,98,125,24,88,48, //vbroadcastss 0x30(%rax),%ymm11
4987 196,98,125,24,96,64, //vbroadcastss 0x40(%rax),%ymm12
4988 197,36,89,219, //vmulps %ymm3,%ymm11,%ymm11
4989 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
4990 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
4991 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4992 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
4993 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4994 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
4995 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
4996 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
4997 196,98,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm10
4998 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
4999 196,98,125,24,96,52, //vbroadcastss 0x34(%rax),%ymm12
5000 196,98,125,24,104,68, //vbroadcastss 0x44(%rax),%ymm13
5001 197,28,89,227, //vmulps %ymm3,%ymm12,%ymm12
5002 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
5003 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
5004 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
5005 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
5006 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
5007 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
5008 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
5009 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
5010 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
5011 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
5012 196,98,125,24,104,56, //vbroadcastss 0x38(%rax),%ymm13
5013 196,98,125,24,112,72, //vbroadcastss 0x48(%rax),%ymm14
5014 197,20,89,235, //vmulps %ymm3,%ymm13,%ymm13
5015 196,65,20,88,238, //vaddps %ymm14,%ymm13,%ymm13
5016 197,28,89,226, //vmulps %ymm2,%ymm12,%ymm12
5017 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
5018 197,36,89,217, //vmulps %ymm1,%ymm11,%ymm11
5019 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
5020 197,44,89,208, //vmulps %ymm0,%ymm10,%ymm10
5021 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
5022 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
5023 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
5024 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
5025 196,98,125,24,112,60, //vbroadcastss 0x3c(%rax),%ymm14
5026 196,98,125,24,120,76, //vbroadcastss 0x4c(%rax),%ymm15
5027 197,140,89,219, //vmulps %ymm3,%ymm14,%ymm3
5028 196,193,100,88,223, //vaddps %ymm15,%ymm3,%ymm3
5029 197,148,89,210, //vmulps %ymm2,%ymm13,%ymm2
5030 197,236,88,211, //vaddps %ymm3,%ymm2,%ymm2
5031 197,156,89,201, //vmulps %ymm1,%ymm12,%ymm1
5032 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
5033 197,164,89,192, //vmulps %ymm0,%ymm11,%ymm0
5034 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
5035 72,173, //lods %ds:(%rsi),%rax
5036 197,124,41,192, //vmovaps %ymm8,%ymm0
5037 197,124,41,201, //vmovaps %ymm9,%ymm1
5038 197,124,41,210, //vmovaps %ymm10,%ymm2
5039 255,224, //jmpq *%rax
5040};
5041
Mike Klein894d5612017-03-07 07:59:52 -05005042CODE const uint8_t sk_matrix_perspective_avx[] = {
5043 72,173, //lods %ds:(%rsi),%rax
5044 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
5045 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
5046 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
5047 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
5048 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
5049 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
5050 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
5051 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
5052 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
5053 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
5054 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
5055 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
5056 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
5057 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
5058 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
5059 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
5060 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
5061 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
5062 196,193,116,88,204, //vaddps %ymm12,%ymm1,%ymm1
5063 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
5064 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
5065 197,252,83,200, //vrcpps %ymm0,%ymm1
5066 197,188,89,193, //vmulps %ymm1,%ymm8,%ymm0
5067 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
5068 72,173, //lods %ds:(%rsi),%rax
5069 255,224, //jmpq *%rax
5070};
5071
5072CODE const uint8_t sk_linear_gradient_2stops_avx[] = {
5073 72,173, //lods %ds:(%rsi),%rax
5074 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
5075 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
5076 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
5077 197,108,88,193, //vaddps %ymm1,%ymm2,%ymm8
5078 196,226,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm1
5079 196,226,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm2
5080 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
5081 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
5082 196,226,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm2
5083 196,226,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm3
5084 197,236,89,208, //vmulps %ymm0,%ymm2,%ymm2
5085 197,228,88,210, //vaddps %ymm2,%ymm3,%ymm2
5086 196,226,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm3
5087 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
5088 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
5089 197,180,88,216, //vaddps %ymm0,%ymm9,%ymm3
5090 72,173, //lods %ds:(%rsi),%rax
5091 197,124,41,192, //vmovaps %ymm8,%ymm0
5092 255,224, //jmpq *%rax
5093};
5094
5095CODE const uint8_t sk_start_pipeline_sse41[] = {
5096 65,87, //push %r15
5097 65,86, //push %r14
5098 65,85, //push %r13
5099 65,84, //push %r12
5100 83, //push %rbx
5101 73,137,207, //mov %rcx,%r15
5102 73,137,214, //mov %rdx,%r14
5103 72,137,251, //mov %rdi,%rbx
5104 72,173, //lods %ds:(%rsi),%rax
5105 73,137,196, //mov %rax,%r12
5106 73,137,245, //mov %rsi,%r13
5107 72,141,67,4, //lea 0x4(%rbx),%rax
5108 76,57,248, //cmp %r15,%rax
5109 118,5, //jbe 28 <_sk_start_pipeline_sse41+0x28>
5110 72,137,216, //mov %rbx,%rax
5111 235,52, //jmp 5c <_sk_start_pipeline_sse41+0x5c>
5112 15,87,192, //xorps %xmm0,%xmm0
5113 15,87,201, //xorps %xmm1,%xmm1
5114 15,87,210, //xorps %xmm2,%xmm2
5115 15,87,219, //xorps %xmm3,%xmm3
5116 15,87,228, //xorps %xmm4,%xmm4
5117 15,87,237, //xorps %xmm5,%xmm5
5118 15,87,246, //xorps %xmm6,%xmm6
5119 15,87,255, //xorps %xmm7,%xmm7
5120 72,137,223, //mov %rbx,%rdi
5121 76,137,238, //mov %r13,%rsi
5122 76,137,242, //mov %r14,%rdx
5123 65,255,212, //callq *%r12
5124 72,141,67,4, //lea 0x4(%rbx),%rax
5125 72,131,195,8, //add $0x8,%rbx
5126 76,57,251, //cmp %r15,%rbx
5127 72,137,195, //mov %rax,%rbx
5128 118,204, //jbe 28 <_sk_start_pipeline_sse41+0x28>
5129 91, //pop %rbx
5130 65,92, //pop %r12
5131 65,93, //pop %r13
5132 65,94, //pop %r14
5133 65,95, //pop %r15
5134 195, //retq
5135};
5136
5137CODE const uint8_t sk_just_return_sse41[] = {
5138 195, //retq
5139};
5140
5141CODE const uint8_t sk_seed_shader_sse41[] = {
5142 72,173, //lods %ds:(%rsi),%rax
5143 102,15,110,199, //movd %edi,%xmm0
5144 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
5145 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005146 185,0,0,0,63, //mov $0x3f000000,%ecx
5147 102,15,110,209, //movd %ecx,%xmm2
5148 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
5149 15,88,202, //addps %xmm2,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05005150 15,16,2, //movups (%rdx),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005151 15,88,193, //addps %xmm1,%xmm0
5152 102,15,110,8, //movd (%rax),%xmm1
5153 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
5154 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005155 15,88,202, //addps %xmm2,%xmm1
5156 184,0,0,128,63, //mov $0x3f800000,%eax
5157 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005158 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
5159 72,173, //lods %ds:(%rsi),%rax
5160 15,87,219, //xorps %xmm3,%xmm3
5161 15,87,228, //xorps %xmm4,%xmm4
5162 15,87,237, //xorps %xmm5,%xmm5
5163 15,87,246, //xorps %xmm6,%xmm6
5164 15,87,255, //xorps %xmm7,%xmm7
5165 255,224, //jmpq *%rax
5166};
5167
5168CODE const uint8_t sk_constant_color_sse41[] = {
5169 72,173, //lods %ds:(%rsi),%rax
5170 15,16,24, //movups (%rax),%xmm3
5171 15,40,195, //movaps %xmm3,%xmm0
5172 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5173 15,40,203, //movaps %xmm3,%xmm1
5174 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
5175 15,40,211, //movaps %xmm3,%xmm2
5176 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
5177 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
5178 72,173, //lods %ds:(%rsi),%rax
5179 255,224, //jmpq *%rax
5180};
5181
5182CODE const uint8_t sk_clear_sse41[] = {
5183 72,173, //lods %ds:(%rsi),%rax
5184 15,87,192, //xorps %xmm0,%xmm0
5185 15,87,201, //xorps %xmm1,%xmm1
5186 15,87,210, //xorps %xmm2,%xmm2
5187 15,87,219, //xorps %xmm3,%xmm3
5188 255,224, //jmpq *%rax
5189};
5190
5191CODE const uint8_t sk_plus__sse41[] = {
5192 15,88,196, //addps %xmm4,%xmm0
5193 15,88,205, //addps %xmm5,%xmm1
5194 15,88,214, //addps %xmm6,%xmm2
5195 15,88,223, //addps %xmm7,%xmm3
5196 72,173, //lods %ds:(%rsi),%rax
5197 255,224, //jmpq *%rax
5198};
5199
5200CODE const uint8_t sk_srcover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005201 184,0,0,128,63, //mov $0x3f800000,%eax
5202 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005203 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5204 68,15,92,195, //subps %xmm3,%xmm8
5205 69,15,40,200, //movaps %xmm8,%xmm9
5206 68,15,89,204, //mulps %xmm4,%xmm9
5207 65,15,88,193, //addps %xmm9,%xmm0
5208 69,15,40,200, //movaps %xmm8,%xmm9
5209 68,15,89,205, //mulps %xmm5,%xmm9
5210 65,15,88,201, //addps %xmm9,%xmm1
5211 69,15,40,200, //movaps %xmm8,%xmm9
5212 68,15,89,206, //mulps %xmm6,%xmm9
5213 65,15,88,209, //addps %xmm9,%xmm2
5214 68,15,89,199, //mulps %xmm7,%xmm8
5215 65,15,88,216, //addps %xmm8,%xmm3
5216 72,173, //lods %ds:(%rsi),%rax
5217 255,224, //jmpq *%rax
5218};
5219
5220CODE const uint8_t sk_dstover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005221 184,0,0,128,63, //mov $0x3f800000,%eax
5222 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005223 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5224 68,15,92,199, //subps %xmm7,%xmm8
5225 65,15,89,192, //mulps %xmm8,%xmm0
5226 15,88,196, //addps %xmm4,%xmm0
5227 65,15,89,200, //mulps %xmm8,%xmm1
5228 15,88,205, //addps %xmm5,%xmm1
5229 65,15,89,208, //mulps %xmm8,%xmm2
5230 15,88,214, //addps %xmm6,%xmm2
5231 65,15,89,216, //mulps %xmm8,%xmm3
5232 15,88,223, //addps %xmm7,%xmm3
5233 72,173, //lods %ds:(%rsi),%rax
5234 255,224, //jmpq *%rax
5235};
5236
5237CODE const uint8_t sk_clamp_0_sse41[] = {
5238 69,15,87,192, //xorps %xmm8,%xmm8
5239 65,15,95,192, //maxps %xmm8,%xmm0
5240 65,15,95,200, //maxps %xmm8,%xmm1
5241 65,15,95,208, //maxps %xmm8,%xmm2
5242 65,15,95,216, //maxps %xmm8,%xmm3
5243 72,173, //lods %ds:(%rsi),%rax
5244 255,224, //jmpq *%rax
5245};
5246
5247CODE const uint8_t sk_clamp_1_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005248 184,0,0,128,63, //mov $0x3f800000,%eax
5249 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005250 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5251 65,15,93,192, //minps %xmm8,%xmm0
5252 65,15,93,200, //minps %xmm8,%xmm1
5253 65,15,93,208, //minps %xmm8,%xmm2
5254 65,15,93,216, //minps %xmm8,%xmm3
5255 72,173, //lods %ds:(%rsi),%rax
5256 255,224, //jmpq *%rax
5257};
5258
5259CODE const uint8_t sk_clamp_a_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005260 184,0,0,128,63, //mov $0x3f800000,%eax
5261 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005262 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5263 65,15,93,216, //minps %xmm8,%xmm3
5264 15,93,195, //minps %xmm3,%xmm0
5265 15,93,203, //minps %xmm3,%xmm1
5266 15,93,211, //minps %xmm3,%xmm2
5267 72,173, //lods %ds:(%rsi),%rax
5268 255,224, //jmpq *%rax
5269};
5270
5271CODE const uint8_t sk_set_rgb_sse41[] = {
5272 72,173, //lods %ds:(%rsi),%rax
5273 243,15,16,0, //movss (%rax),%xmm0
5274 243,15,16,72,4, //movss 0x4(%rax),%xmm1
5275 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5276 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
5277 243,15,16,80,8, //movss 0x8(%rax),%xmm2
5278 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
5279 72,173, //lods %ds:(%rsi),%rax
5280 255,224, //jmpq *%rax
5281};
5282
5283CODE const uint8_t sk_swap_rb_sse41[] = {
5284 68,15,40,192, //movaps %xmm0,%xmm8
5285 72,173, //lods %ds:(%rsi),%rax
5286 15,40,194, //movaps %xmm2,%xmm0
5287 65,15,40,208, //movaps %xmm8,%xmm2
5288 255,224, //jmpq *%rax
5289};
5290
5291CODE const uint8_t sk_swap_sse41[] = {
5292 68,15,40,195, //movaps %xmm3,%xmm8
5293 68,15,40,202, //movaps %xmm2,%xmm9
5294 68,15,40,209, //movaps %xmm1,%xmm10
5295 68,15,40,216, //movaps %xmm0,%xmm11
5296 72,173, //lods %ds:(%rsi),%rax
5297 15,40,196, //movaps %xmm4,%xmm0
5298 15,40,205, //movaps %xmm5,%xmm1
5299 15,40,214, //movaps %xmm6,%xmm2
5300 15,40,223, //movaps %xmm7,%xmm3
5301 65,15,40,227, //movaps %xmm11,%xmm4
5302 65,15,40,234, //movaps %xmm10,%xmm5
5303 65,15,40,241, //movaps %xmm9,%xmm6
5304 65,15,40,248, //movaps %xmm8,%xmm7
5305 255,224, //jmpq *%rax
5306};
5307
5308CODE const uint8_t sk_move_src_dst_sse41[] = {
5309 72,173, //lods %ds:(%rsi),%rax
5310 15,40,224, //movaps %xmm0,%xmm4
5311 15,40,233, //movaps %xmm1,%xmm5
5312 15,40,242, //movaps %xmm2,%xmm6
5313 15,40,251, //movaps %xmm3,%xmm7
5314 255,224, //jmpq *%rax
5315};
5316
5317CODE const uint8_t sk_move_dst_src_sse41[] = {
5318 72,173, //lods %ds:(%rsi),%rax
5319 15,40,196, //movaps %xmm4,%xmm0
5320 15,40,205, //movaps %xmm5,%xmm1
5321 15,40,214, //movaps %xmm6,%xmm2
5322 15,40,223, //movaps %xmm7,%xmm3
5323 255,224, //jmpq *%rax
5324};
5325
5326CODE const uint8_t sk_premul_sse41[] = {
5327 15,89,195, //mulps %xmm3,%xmm0
5328 15,89,203, //mulps %xmm3,%xmm1
5329 15,89,211, //mulps %xmm3,%xmm2
5330 72,173, //lods %ds:(%rsi),%rax
5331 255,224, //jmpq *%rax
5332};
5333
5334CODE const uint8_t sk_unpremul_sse41[] = {
Mike Klein64b97482017-03-14 17:35:04 -07005335 69,15,87,192, //xorps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005336 184,0,0,128,63, //mov $0x3f800000,%eax
Mike Klein64b97482017-03-14 17:35:04 -07005337 102,68,15,110,200, //movd %eax,%xmm9
5338 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5339 68,15,94,203, //divps %xmm3,%xmm9
5340 68,15,194,195,4, //cmpneqps %xmm3,%xmm8
5341 69,15,84,193, //andps %xmm9,%xmm8
5342 65,15,89,192, //mulps %xmm8,%xmm0
5343 65,15,89,200, //mulps %xmm8,%xmm1
5344 65,15,89,208, //mulps %xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005345 72,173, //lods %ds:(%rsi),%rax
Mike Klein894d5612017-03-07 07:59:52 -05005346 255,224, //jmpq *%rax
5347};
5348
5349CODE const uint8_t sk_from_srgb_sse41[] = {
Mike Klein5224f462017-03-07 17:29:54 -05005350 184,145,131,158,61, //mov $0x3d9e8391,%eax
5351 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05005352 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5353 69,15,40,211, //movaps %xmm11,%xmm10
5354 68,15,89,208, //mulps %xmm0,%xmm10
5355 68,15,40,240, //movaps %xmm0,%xmm14
5356 69,15,89,246, //mulps %xmm14,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -05005357 184,154,153,153,62, //mov $0x3e99999a,%eax
5358 102,68,15,110,192, //movd %eax,%xmm8
5359 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5360 184,92,143,50,63, //mov $0x3f328f5c,%eax
5361 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05005362 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -05005363 69,15,40,200, //movaps %xmm8,%xmm9
5364 68,15,89,200, //mulps %xmm0,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05005365 69,15,88,204, //addps %xmm12,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05005366 184,10,215,35,59, //mov $0x3b23d70a,%eax
5367 102,68,15,110,232, //movd %eax,%xmm13
5368 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5369 69,15,89,206, //mulps %xmm14,%xmm9
5370 69,15,88,205, //addps %xmm13,%xmm9
5371 184,174,71,97,61, //mov $0x3d6147ae,%eax
5372 102,68,15,110,240, //movd %eax,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -05005373 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
5374 65,15,194,198,1, //cmpltps %xmm14,%xmm0
5375 102,69,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm9
5376 69,15,40,251, //movaps %xmm11,%xmm15
5377 68,15,89,249, //mulps %xmm1,%xmm15
5378 15,40,193, //movaps %xmm1,%xmm0
5379 15,89,192, //mulps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005380 69,15,40,208, //movaps %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05005381 68,15,89,209, //mulps %xmm1,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05005382 69,15,88,212, //addps %xmm12,%xmm10
Mike Klein5224f462017-03-07 17:29:54 -05005383 68,15,89,208, //mulps %xmm0,%xmm10
5384 69,15,88,213, //addps %xmm13,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05005385 65,15,194,206,1, //cmpltps %xmm14,%xmm1
5386 15,40,193, //movaps %xmm1,%xmm0
5387 102,69,15,56,20,215, //blendvps %xmm0,%xmm15,%xmm10
Mike Klein5224f462017-03-07 17:29:54 -05005388 68,15,89,218, //mulps %xmm2,%xmm11
5389 15,40,194, //movaps %xmm2,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005390 15,89,192, //mulps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005391 68,15,89,194, //mulps %xmm2,%xmm8
5392 69,15,88,196, //addps %xmm12,%xmm8
5393 68,15,89,192, //mulps %xmm0,%xmm8
5394 69,15,88,197, //addps %xmm13,%xmm8
5395 65,15,194,214,1, //cmpltps %xmm14,%xmm2
5396 15,40,194, //movaps %xmm2,%xmm0
5397 102,69,15,56,20,195, //blendvps %xmm0,%xmm11,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005398 72,173, //lods %ds:(%rsi),%rax
5399 65,15,40,193, //movaps %xmm9,%xmm0
5400 65,15,40,202, //movaps %xmm10,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05005401 65,15,40,208, //movaps %xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005402 255,224, //jmpq *%rax
5403};
5404
5405CODE const uint8_t sk_to_srgb_sse41[] = {
5406 72,131,236,24, //sub $0x18,%rsp
5407 15,41,60,36, //movaps %xmm7,(%rsp)
5408 15,40,254, //movaps %xmm6,%xmm7
5409 15,40,245, //movaps %xmm5,%xmm6
5410 15,40,236, //movaps %xmm4,%xmm5
5411 15,40,227, //movaps %xmm3,%xmm4
Mike Klein5224f462017-03-07 17:29:54 -05005412 15,40,218, //movaps %xmm2,%xmm3
5413 15,40,209, //movaps %xmm1,%xmm2
5414 68,15,82,192, //rsqrtps %xmm0,%xmm8
5415 69,15,83,200, //rcpps %xmm8,%xmm9
5416 69,15,82,248, //rsqrtps %xmm8,%xmm15
5417 184,41,92,71,65, //mov $0x41475c29,%eax
5418 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05005419 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
Mike Klein5224f462017-03-07 17:29:54 -05005420 69,15,40,211, //movaps %xmm11,%xmm10
5421 68,15,89,208, //mulps %xmm0,%xmm10
5422 184,0,0,128,63, //mov $0x3f800000,%eax
5423 102,68,15,110,192, //movd %eax,%xmm8
5424 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5425 184,194,135,210,62, //mov $0x3ed287c2,%eax
5426 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05005427 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -05005428 184,206,111,48,63, //mov $0x3f306fce,%eax
5429 102,68,15,110,232, //movd %eax,%xmm13
Mike Klein894d5612017-03-07 07:59:52 -05005430 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
Mike Klein5224f462017-03-07 17:29:54 -05005431 184,168,87,202,61, //mov $0x3dca57a8,%eax
5432 53,0,0,0,128, //xor $0x80000000,%eax
5433 102,68,15,110,240, //movd %eax,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -05005434 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
5435 69,15,89,205, //mulps %xmm13,%xmm9
5436 69,15,88,206, //addps %xmm14,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05005437 69,15,89,252, //mulps %xmm12,%xmm15
5438 69,15,88,249, //addps %xmm9,%xmm15
5439 69,15,40,200, //movaps %xmm8,%xmm9
5440 69,15,93,207, //minps %xmm15,%xmm9
5441 184,4,231,140,59, //mov $0x3b8ce704,%eax
5442 102,68,15,110,248, //movd %eax,%xmm15
Mike Klein894d5612017-03-07 07:59:52 -05005443 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
5444 65,15,194,199,1, //cmpltps %xmm15,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005445 102,69,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm9
5446 68,15,82,210, //rsqrtps %xmm2,%xmm10
5447 65,15,83,194, //rcpps %xmm10,%xmm0
5448 69,15,82,210, //rsqrtps %xmm10,%xmm10
5449 65,15,89,197, //mulps %xmm13,%xmm0
5450 65,15,88,198, //addps %xmm14,%xmm0
5451 69,15,89,212, //mulps %xmm12,%xmm10
5452 68,15,88,208, //addps %xmm0,%xmm10
5453 65,15,40,200, //movaps %xmm8,%xmm1
5454 65,15,93,202, //minps %xmm10,%xmm1
5455 69,15,40,211, //movaps %xmm11,%xmm10
5456 68,15,89,210, //mulps %xmm2,%xmm10
5457 65,15,194,215,1, //cmpltps %xmm15,%xmm2
5458 15,40,194, //movaps %xmm2,%xmm0
5459 102,65,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05005460 15,82,195, //rsqrtps %xmm3,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005461 15,83,208, //rcpps %xmm0,%xmm2
5462 65,15,89,213, //mulps %xmm13,%xmm2
5463 65,15,88,214, //addps %xmm14,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005464 15,82,192, //rsqrtps %xmm0,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005465 65,15,89,196, //mulps %xmm12,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005466 15,88,194, //addps %xmm2,%xmm0
5467 68,15,93,192, //minps %xmm0,%xmm8
5468 68,15,89,219, //mulps %xmm3,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05005469 65,15,194,223,1, //cmpltps %xmm15,%xmm3
5470 15,40,195, //movaps %xmm3,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005471 102,69,15,56,20,195, //blendvps %xmm0,%xmm11,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005472 72,173, //lods %ds:(%rsi),%rax
5473 65,15,40,193, //movaps %xmm9,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005474 65,15,40,208, //movaps %xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005475 15,40,220, //movaps %xmm4,%xmm3
5476 15,40,229, //movaps %xmm5,%xmm4
5477 15,40,238, //movaps %xmm6,%xmm5
5478 15,40,247, //movaps %xmm7,%xmm6
5479 15,40,60,36, //movaps (%rsp),%xmm7
5480 72,131,196,24, //add $0x18,%rsp
5481 255,224, //jmpq *%rax
5482};
5483
5484CODE const uint8_t sk_scale_1_float_sse41[] = {
5485 72,173, //lods %ds:(%rsi),%rax
5486 243,68,15,16,0, //movss (%rax),%xmm8
5487 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5488 65,15,89,192, //mulps %xmm8,%xmm0
5489 65,15,89,200, //mulps %xmm8,%xmm1
5490 65,15,89,208, //mulps %xmm8,%xmm2
5491 65,15,89,216, //mulps %xmm8,%xmm3
5492 72,173, //lods %ds:(%rsi),%rax
5493 255,224, //jmpq *%rax
5494};
5495
5496CODE const uint8_t sk_scale_u8_sse41[] = {
5497 72,173, //lods %ds:(%rsi),%rax
5498 72,139,0, //mov (%rax),%rax
5499 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
5500 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005501 184,129,128,128,59, //mov $0x3b808081,%eax
5502 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05005503 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5504 69,15,89,200, //mulps %xmm8,%xmm9
5505 65,15,89,193, //mulps %xmm9,%xmm0
5506 65,15,89,201, //mulps %xmm9,%xmm1
5507 65,15,89,209, //mulps %xmm9,%xmm2
5508 65,15,89,217, //mulps %xmm9,%xmm3
5509 72,173, //lods %ds:(%rsi),%rax
5510 255,224, //jmpq *%rax
5511};
5512
5513CODE const uint8_t sk_lerp_1_float_sse41[] = {
5514 72,173, //lods %ds:(%rsi),%rax
5515 243,68,15,16,0, //movss (%rax),%xmm8
5516 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5517 15,92,196, //subps %xmm4,%xmm0
5518 65,15,89,192, //mulps %xmm8,%xmm0
5519 15,88,196, //addps %xmm4,%xmm0
5520 15,92,205, //subps %xmm5,%xmm1
5521 65,15,89,200, //mulps %xmm8,%xmm1
5522 15,88,205, //addps %xmm5,%xmm1
5523 15,92,214, //subps %xmm6,%xmm2
5524 65,15,89,208, //mulps %xmm8,%xmm2
5525 15,88,214, //addps %xmm6,%xmm2
5526 15,92,223, //subps %xmm7,%xmm3
5527 65,15,89,216, //mulps %xmm8,%xmm3
5528 15,88,223, //addps %xmm7,%xmm3
5529 72,173, //lods %ds:(%rsi),%rax
5530 255,224, //jmpq *%rax
5531};
5532
5533CODE const uint8_t sk_lerp_u8_sse41[] = {
5534 72,173, //lods %ds:(%rsi),%rax
5535 72,139,0, //mov (%rax),%rax
5536 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
5537 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005538 184,129,128,128,59, //mov $0x3b808081,%eax
5539 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05005540 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5541 69,15,89,200, //mulps %xmm8,%xmm9
5542 15,92,196, //subps %xmm4,%xmm0
5543 65,15,89,193, //mulps %xmm9,%xmm0
5544 15,88,196, //addps %xmm4,%xmm0
5545 15,92,205, //subps %xmm5,%xmm1
5546 65,15,89,201, //mulps %xmm9,%xmm1
5547 15,88,205, //addps %xmm5,%xmm1
5548 15,92,214, //subps %xmm6,%xmm2
5549 65,15,89,209, //mulps %xmm9,%xmm2
5550 15,88,214, //addps %xmm6,%xmm2
5551 15,92,223, //subps %xmm7,%xmm3
5552 65,15,89,217, //mulps %xmm9,%xmm3
5553 15,88,223, //addps %xmm7,%xmm3
5554 72,173, //lods %ds:(%rsi),%rax
5555 255,224, //jmpq *%rax
5556};
5557
5558CODE const uint8_t sk_lerp_565_sse41[] = {
5559 72,173, //lods %ds:(%rsi),%rax
5560 72,139,0, //mov (%rax),%rax
5561 102,68,15,56,51,4,120, //pmovzxwd (%rax,%rdi,2),%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05005562 184,0,248,0,0, //mov $0xf800,%eax
5563 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005564 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
5565 102,65,15,219,216, //pand %xmm8,%xmm3
5566 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05005567 184,8,33,132,55, //mov $0x37842108,%eax
5568 102,68,15,110,208, //movd %eax,%xmm10
5569 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5570 69,15,89,209, //mulps %xmm9,%xmm10
5571 184,224,7,0,0, //mov $0x7e0,%eax
5572 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005573 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
5574 102,65,15,219,216, //pand %xmm8,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05005575 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
5576 184,33,8,2,58, //mov $0x3a020821,%eax
5577 102,68,15,110,216, //movd %eax,%xmm11
5578 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5579 69,15,89,217, //mulps %xmm9,%xmm11
5580 184,31,0,0,0, //mov $0x1f,%eax
5581 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005582 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
5583 102,65,15,219,216, //pand %xmm8,%xmm3
5584 68,15,91,195, //cvtdq2ps %xmm3,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05005585 184,8,33,4,61, //mov $0x3d042108,%eax
5586 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005587 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5588 65,15,89,216, //mulps %xmm8,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005589 15,92,196, //subps %xmm4,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005590 65,15,89,194, //mulps %xmm10,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005591 15,88,196, //addps %xmm4,%xmm0
5592 15,92,205, //subps %xmm5,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05005593 65,15,89,203, //mulps %xmm11,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05005594 15,88,205, //addps %xmm5,%xmm1
5595 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005596 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005597 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005598 184,0,0,128,63, //mov $0x3f800000,%eax
5599 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005600 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5601 72,173, //lods %ds:(%rsi),%rax
5602 255,224, //jmpq *%rax
5603};
5604
5605CODE const uint8_t sk_load_tables_sse41[] = {
5606 72,173, //lods %ds:(%rsi),%rax
5607 72,139,8, //mov (%rax),%rcx
5608 76,139,64,8, //mov 0x8(%rax),%r8
5609 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05005610 185,255,0,0,0, //mov $0xff,%ecx
5611 102,15,110,193, //movd %ecx,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005612 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
5613 102,65,15,111,200, //movdqa %xmm8,%xmm1
5614 102,15,114,209,8, //psrld $0x8,%xmm1
5615 102,15,219,200, //pand %xmm0,%xmm1
5616 102,65,15,111,208, //movdqa %xmm8,%xmm2
5617 102,15,114,210,16, //psrld $0x10,%xmm2
5618 102,15,219,208, //pand %xmm0,%xmm2
5619 102,65,15,219,192, //pand %xmm8,%xmm0
5620 102,72,15,58,22,193,1, //pextrq $0x1,%xmm0,%rcx
5621 65,137,201, //mov %ecx,%r9d
5622 72,193,233,32, //shr $0x20,%rcx
5623 102,73,15,126,194, //movq %xmm0,%r10
5624 69,137,211, //mov %r10d,%r11d
5625 73,193,234,32, //shr $0x20,%r10
5626 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
5627 102,67,15,58,33,4,144,16, //insertps $0x10,(%r8,%r10,4),%xmm0
5628 102,67,15,58,33,4,136,32, //insertps $0x20,(%r8,%r9,4),%xmm0
5629 102,65,15,58,33,4,136,48, //insertps $0x30,(%r8,%rcx,4),%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07005630 76,139,64,16, //mov 0x10(%rax),%r8
5631 102,73,15,58,22,202,1, //pextrq $0x1,%xmm1,%r10
5632 77,137,209, //mov %r10,%r9
Mike Klein894d5612017-03-07 07:59:52 -05005633 73,193,233,32, //shr $0x20,%r9
Mike Klein64b97482017-03-14 17:35:04 -07005634 102,72,15,126,201, //movq %xmm1,%rcx
5635 65,137,203, //mov %ecx,%r11d
5636 65,129,227,255,255,255,0, //and $0xffffff,%r11d
5637 72,193,233,30, //shr $0x1e,%rcx
5638 65,129,226,255,255,255,0, //and $0xffffff,%r10d
5639 243,67,15,16,12,152, //movss (%r8,%r11,4),%xmm1
5640 102,65,15,58,33,12,8,16, //insertps $0x10,(%r8,%rcx,1),%xmm1
5641 243,67,15,16,28,144, //movss (%r8,%r10,4),%xmm3
5642 102,15,58,33,203,32, //insertps $0x20,%xmm3,%xmm1
5643 243,67,15,16,28,136, //movss (%r8,%r9,4),%xmm3
5644 102,15,58,33,203,48, //insertps $0x30,%xmm3,%xmm1
5645 76,139,72,24, //mov 0x18(%rax),%r9
5646 102,72,15,58,22,209,1, //pextrq $0x1,%xmm2,%rcx
5647 68,15,183,193, //movzwl %cx,%r8d
5648 72,193,233,32, //shr $0x20,%rcx
5649 102,72,15,126,208, //movq %xmm2,%rax
5650 68,15,183,208, //movzwl %ax,%r10d
5651 72,193,232,30, //shr $0x1e,%rax
5652 243,67,15,16,20,145, //movss (%r9,%r10,4),%xmm2
5653 102,65,15,58,33,20,1,16, //insertps $0x10,(%r9,%rax,1),%xmm2
5654 243,67,15,16,28,129, //movss (%r9,%r8,4),%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005655 102,15,58,33,211,32, //insertps $0x20,%xmm3,%xmm2
Mike Klein64b97482017-03-14 17:35:04 -07005656 243,65,15,16,28,137, //movss (%r9,%rcx,4),%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005657 102,15,58,33,211,48, //insertps $0x30,%xmm3,%xmm2
5658 102,65,15,114,208,24, //psrld $0x18,%xmm8
5659 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05005660 184,129,128,128,59, //mov $0x3b808081,%eax
5661 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005662 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5663 65,15,89,216, //mulps %xmm8,%xmm3
5664 72,173, //lods %ds:(%rsi),%rax
5665 255,224, //jmpq *%rax
5666};
5667
5668CODE const uint8_t sk_load_a8_sse41[] = {
5669 72,173, //lods %ds:(%rsi),%rax
5670 72,139,0, //mov (%rax),%rax
5671 102,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm0
5672 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005673 184,129,128,128,59, //mov $0x3b808081,%eax
5674 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005675 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5676 15,89,216, //mulps %xmm0,%xmm3
5677 72,173, //lods %ds:(%rsi),%rax
5678 15,87,192, //xorps %xmm0,%xmm0
5679 15,87,201, //xorps %xmm1,%xmm1
5680 15,87,210, //xorps %xmm2,%xmm2
5681 255,224, //jmpq *%rax
5682};
5683
5684CODE const uint8_t sk_store_a8_sse41[] = {
5685 72,173, //lods %ds:(%rsi),%rax
5686 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05005687 185,0,0,127,67, //mov $0x437f0000,%ecx
5688 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005689 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5690 68,15,89,195, //mulps %xmm3,%xmm8
5691 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
5692 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
5693 102,69,15,103,192, //packuswb %xmm8,%xmm8
5694 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
5695 72,173, //lods %ds:(%rsi),%rax
5696 255,224, //jmpq *%rax
5697};
5698
5699CODE const uint8_t sk_load_565_sse41[] = {
5700 72,173, //lods %ds:(%rsi),%rax
5701 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05005702 102,15,56,51,20,120, //pmovzxwd (%rax,%rdi,2),%xmm2
5703 184,0,248,0,0, //mov $0xf800,%eax
5704 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005705 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005706 102,15,219,194, //pand %xmm2,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005707 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05005708 184,8,33,132,55, //mov $0x37842108,%eax
5709 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005710 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5711 15,89,193, //mulps %xmm1,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005712 184,224,7,0,0, //mov $0x7e0,%eax
5713 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05005714 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05005715 102,15,219,202, //pand %xmm2,%xmm1
5716 15,91,217, //cvtdq2ps %xmm1,%xmm3
5717 184,33,8,2,58, //mov $0x3a020821,%eax
5718 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05005719 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05005720 15,89,203, //mulps %xmm3,%xmm1
5721 184,31,0,0,0, //mov $0x1f,%eax
5722 102,15,110,216, //movd %eax,%xmm3
5723 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
5724 102,15,219,218, //pand %xmm2,%xmm3
5725 15,91,219, //cvtdq2ps %xmm3,%xmm3
5726 184,8,33,4,61, //mov $0x3d042108,%eax
5727 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005728 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -05005729 15,89,211, //mulps %xmm3,%xmm2
5730 184,0,0,128,63, //mov $0x3f800000,%eax
5731 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005732 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5733 72,173, //lods %ds:(%rsi),%rax
5734 255,224, //jmpq *%rax
5735};
5736
5737CODE const uint8_t sk_store_565_sse41[] = {
5738 72,173, //lods %ds:(%rsi),%rax
5739 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05005740 185,0,0,248,65, //mov $0x41f80000,%ecx
5741 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005742 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05005743 69,15,40,200, //movaps %xmm8,%xmm9
5744 68,15,89,200, //mulps %xmm0,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05005745 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05005746 102,65,15,114,241,11, //pslld $0xb,%xmm9
5747 185,0,0,124,66, //mov $0x427c0000,%ecx
5748 102,68,15,110,209, //movd %ecx,%xmm10
5749 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5750 68,15,89,209, //mulps %xmm1,%xmm10
5751 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
5752 102,65,15,114,242,5, //pslld $0x5,%xmm10
5753 102,69,15,235,209, //por %xmm9,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05005754 68,15,89,194, //mulps %xmm2,%xmm8
5755 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05005756 102,69,15,86,194, //orpd %xmm10,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005757 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
5758 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
5759 72,173, //lods %ds:(%rsi),%rax
5760 255,224, //jmpq *%rax
5761};
5762
5763CODE const uint8_t sk_load_8888_sse41[] = {
5764 72,173, //lods %ds:(%rsi),%rax
5765 72,139,0, //mov (%rax),%rax
5766 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005767 184,255,0,0,0, //mov $0xff,%eax
5768 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005769 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
5770 102,15,111,203, //movdqa %xmm3,%xmm1
5771 102,15,114,209,8, //psrld $0x8,%xmm1
5772 102,15,219,200, //pand %xmm0,%xmm1
5773 102,15,111,211, //movdqa %xmm3,%xmm2
5774 102,15,114,210,16, //psrld $0x10,%xmm2
5775 102,15,219,208, //pand %xmm0,%xmm2
5776 102,15,219,195, //pand %xmm3,%xmm0
5777 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005778 184,129,128,128,59, //mov $0x3b808081,%eax
5779 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005780 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5781 65,15,89,192, //mulps %xmm8,%xmm0
5782 15,91,201, //cvtdq2ps %xmm1,%xmm1
5783 65,15,89,200, //mulps %xmm8,%xmm1
5784 15,91,210, //cvtdq2ps %xmm2,%xmm2
5785 65,15,89,208, //mulps %xmm8,%xmm2
5786 102,15,114,211,24, //psrld $0x18,%xmm3
5787 15,91,219, //cvtdq2ps %xmm3,%xmm3
5788 65,15,89,216, //mulps %xmm8,%xmm3
5789 72,173, //lods %ds:(%rsi),%rax
5790 255,224, //jmpq *%rax
5791};
5792
5793CODE const uint8_t sk_store_8888_sse41[] = {
5794 72,173, //lods %ds:(%rsi),%rax
5795 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005796 185,0,0,127,67, //mov $0x437f0000,%ecx
5797 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005798 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5799 69,15,40,200, //movaps %xmm8,%xmm9
5800 68,15,89,200, //mulps %xmm0,%xmm9
5801 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
5802 69,15,40,208, //movaps %xmm8,%xmm10
5803 68,15,89,209, //mulps %xmm1,%xmm10
5804 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
5805 102,65,15,114,242,8, //pslld $0x8,%xmm10
5806 102,69,15,235,209, //por %xmm9,%xmm10
5807 69,15,40,200, //movaps %xmm8,%xmm9
5808 68,15,89,202, //mulps %xmm2,%xmm9
5809 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
5810 102,65,15,114,241,16, //pslld $0x10,%xmm9
5811 68,15,89,195, //mulps %xmm3,%xmm8
5812 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
5813 102,65,15,114,240,24, //pslld $0x18,%xmm8
5814 102,69,15,235,193, //por %xmm9,%xmm8
5815 102,69,15,235,194, //por %xmm10,%xmm8
5816 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
5817 72,173, //lods %ds:(%rsi),%rax
5818 255,224, //jmpq *%rax
5819};
5820
5821CODE const uint8_t sk_load_f16_sse41[] = {
5822 72,173, //lods %ds:(%rsi),%rax
5823 72,139,0, //mov (%rax),%rax
5824 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
5825 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
5826 102,15,111,208, //movdqa %xmm0,%xmm2
5827 102,15,97,209, //punpcklwd %xmm1,%xmm2
5828 102,15,105,193, //punpckhwd %xmm1,%xmm0
5829 102,68,15,111,194, //movdqa %xmm2,%xmm8
5830 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
5831 102,15,105,208, //punpckhwd %xmm0,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -05005832 184,0,4,0,4, //mov $0x4000400,%eax
5833 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005834 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
5835 102,15,111,203, //movdqa %xmm3,%xmm1
5836 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
5837 102,65,15,223,200, //pandn %xmm8,%xmm1
5838 102,15,101,218, //pcmpgtw %xmm2,%xmm3
5839 102,15,223,218, //pandn %xmm2,%xmm3
5840 102,15,56,51,193, //pmovzxwd %xmm1,%xmm0
5841 102,15,114,240,13, //pslld $0xd,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05005842 184,0,0,128,119, //mov $0x77800000,%eax
5843 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005844 102,68,15,112,194,0, //pshufd $0x0,%xmm2,%xmm8
5845 65,15,89,192, //mulps %xmm8,%xmm0
5846 102,69,15,239,201, //pxor %xmm9,%xmm9
5847 102,65,15,105,201, //punpckhwd %xmm9,%xmm1
5848 102,15,114,241,13, //pslld $0xd,%xmm1
5849 65,15,89,200, //mulps %xmm8,%xmm1
5850 102,15,56,51,211, //pmovzxwd %xmm3,%xmm2
5851 102,15,114,242,13, //pslld $0xd,%xmm2
5852 65,15,89,208, //mulps %xmm8,%xmm2
5853 102,65,15,105,217, //punpckhwd %xmm9,%xmm3
5854 102,15,114,243,13, //pslld $0xd,%xmm3
5855 65,15,89,216, //mulps %xmm8,%xmm3
5856 72,173, //lods %ds:(%rsi),%rax
5857 255,224, //jmpq *%rax
5858};
5859
5860CODE const uint8_t sk_store_f16_sse41[] = {
5861 72,173, //lods %ds:(%rsi),%rax
5862 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05005863 185,0,0,128,7, //mov $0x7800000,%ecx
5864 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005865 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
5866 102,69,15,111,200, //movdqa %xmm8,%xmm9
5867 68,15,89,200, //mulps %xmm0,%xmm9
5868 102,65,15,114,209,13, //psrld $0xd,%xmm9
5869 102,69,15,111,208, //movdqa %xmm8,%xmm10
5870 68,15,89,209, //mulps %xmm1,%xmm10
5871 102,65,15,114,210,13, //psrld $0xd,%xmm10
5872 102,69,15,111,216, //movdqa %xmm8,%xmm11
5873 68,15,89,218, //mulps %xmm2,%xmm11
5874 102,65,15,114,211,13, //psrld $0xd,%xmm11
5875 68,15,89,195, //mulps %xmm3,%xmm8
5876 102,65,15,114,208,13, //psrld $0xd,%xmm8
5877 102,65,15,115,250,2, //pslldq $0x2,%xmm10
5878 102,69,15,235,209, //por %xmm9,%xmm10
5879 102,65,15,115,248,2, //pslldq $0x2,%xmm8
5880 102,69,15,235,195, //por %xmm11,%xmm8
5881 102,69,15,111,202, //movdqa %xmm10,%xmm9
5882 102,69,15,98,200, //punpckldq %xmm8,%xmm9
5883 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
5884 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
5885 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
5886 72,173, //lods %ds:(%rsi),%rax
5887 255,224, //jmpq *%rax
5888};
5889
5890CODE const uint8_t sk_store_f32_sse41[] = {
5891 72,173, //lods %ds:(%rsi),%rax
5892 72,139,0, //mov (%rax),%rax
5893 72,137,249, //mov %rdi,%rcx
5894 72,193,225,4, //shl $0x4,%rcx
5895 68,15,40,192, //movaps %xmm0,%xmm8
5896 68,15,40,200, //movaps %xmm0,%xmm9
5897 68,15,20,201, //unpcklps %xmm1,%xmm9
5898 68,15,40,210, //movaps %xmm2,%xmm10
5899 68,15,40,218, //movaps %xmm2,%xmm11
5900 68,15,20,219, //unpcklps %xmm3,%xmm11
5901 68,15,21,193, //unpckhps %xmm1,%xmm8
5902 68,15,21,211, //unpckhps %xmm3,%xmm10
5903 69,15,40,225, //movaps %xmm9,%xmm12
5904 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -07005905 69,15,18,217, //movhlps %xmm9,%xmm11
5906 69,15,40,200, //movaps %xmm8,%xmm9
5907 102,69,15,20,202, //unpcklpd %xmm10,%xmm9
5908 69,15,18,208, //movhlps %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05005909 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
Mike Klein64b97482017-03-14 17:35:04 -07005910 68,15,17,92,8,16, //movups %xmm11,0x10(%rax,%rcx,1)
5911 102,68,15,17,76,8,32, //movupd %xmm9,0x20(%rax,%rcx,1)
5912 68,15,17,84,8,48, //movups %xmm10,0x30(%rax,%rcx,1)
Mike Klein894d5612017-03-07 07:59:52 -05005913 72,173, //lods %ds:(%rsi),%rax
5914 255,224, //jmpq *%rax
5915};
5916
5917CODE const uint8_t sk_clamp_x_sse41[] = {
5918 72,173, //lods %ds:(%rsi),%rax
5919 69,15,87,192, //xorps %xmm8,%xmm8
5920 68,15,95,192, //maxps %xmm0,%xmm8
5921 243,68,15,16,8, //movss (%rax),%xmm9
5922 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5923 102,15,118,192, //pcmpeqd %xmm0,%xmm0
5924 102,65,15,254,193, //paddd %xmm9,%xmm0
5925 68,15,93,192, //minps %xmm0,%xmm8
5926 72,173, //lods %ds:(%rsi),%rax
5927 65,15,40,192, //movaps %xmm8,%xmm0
5928 255,224, //jmpq *%rax
5929};
5930
5931CODE const uint8_t sk_clamp_y_sse41[] = {
5932 72,173, //lods %ds:(%rsi),%rax
5933 69,15,87,192, //xorps %xmm8,%xmm8
5934 68,15,95,193, //maxps %xmm1,%xmm8
5935 243,68,15,16,8, //movss (%rax),%xmm9
5936 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5937 102,15,118,201, //pcmpeqd %xmm1,%xmm1
5938 102,65,15,254,201, //paddd %xmm9,%xmm1
5939 68,15,93,193, //minps %xmm1,%xmm8
5940 72,173, //lods %ds:(%rsi),%rax
5941 65,15,40,200, //movaps %xmm8,%xmm1
5942 255,224, //jmpq *%rax
5943};
5944
5945CODE const uint8_t sk_repeat_x_sse41[] = {
5946 72,173, //lods %ds:(%rsi),%rax
5947 243,68,15,16,0, //movss (%rax),%xmm8
5948 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5949 68,15,40,200, //movaps %xmm0,%xmm9
5950 69,15,94,200, //divps %xmm8,%xmm9
5951 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
5952 69,15,89,200, //mulps %xmm8,%xmm9
5953 65,15,92,193, //subps %xmm9,%xmm0
5954 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
5955 102,69,15,254,200, //paddd %xmm8,%xmm9
5956 65,15,93,193, //minps %xmm9,%xmm0
5957 72,173, //lods %ds:(%rsi),%rax
5958 255,224, //jmpq *%rax
5959};
5960
5961CODE const uint8_t sk_repeat_y_sse41[] = {
5962 72,173, //lods %ds:(%rsi),%rax
5963 243,68,15,16,0, //movss (%rax),%xmm8
5964 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5965 68,15,40,201, //movaps %xmm1,%xmm9
5966 69,15,94,200, //divps %xmm8,%xmm9
5967 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
5968 69,15,89,200, //mulps %xmm8,%xmm9
5969 65,15,92,201, //subps %xmm9,%xmm1
5970 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
5971 102,69,15,254,200, //paddd %xmm8,%xmm9
5972 65,15,93,201, //minps %xmm9,%xmm1
5973 72,173, //lods %ds:(%rsi),%rax
5974 255,224, //jmpq *%rax
5975};
5976
5977CODE const uint8_t sk_mirror_x_sse41[] = {
5978 72,173, //lods %ds:(%rsi),%rax
5979 243,68,15,16,0, //movss (%rax),%xmm8
5980 69,15,40,200, //movaps %xmm8,%xmm9
5981 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5982 65,15,92,193, //subps %xmm9,%xmm0
5983 243,69,15,88,192, //addss %xmm8,%xmm8
5984 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5985 68,15,40,208, //movaps %xmm0,%xmm10
5986 69,15,94,208, //divps %xmm8,%xmm10
5987 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
5988 69,15,89,208, //mulps %xmm8,%xmm10
5989 65,15,92,194, //subps %xmm10,%xmm0
5990 65,15,92,193, //subps %xmm9,%xmm0
5991 69,15,87,192, //xorps %xmm8,%xmm8
5992 68,15,92,192, //subps %xmm0,%xmm8
5993 65,15,84,192, //andps %xmm8,%xmm0
5994 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
5995 102,69,15,254,193, //paddd %xmm9,%xmm8
5996 65,15,93,192, //minps %xmm8,%xmm0
5997 72,173, //lods %ds:(%rsi),%rax
5998 255,224, //jmpq *%rax
5999};
6000
6001CODE const uint8_t sk_mirror_y_sse41[] = {
6002 72,173, //lods %ds:(%rsi),%rax
6003 243,68,15,16,0, //movss (%rax),%xmm8
6004 69,15,40,200, //movaps %xmm8,%xmm9
6005 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6006 65,15,92,201, //subps %xmm9,%xmm1
6007 243,69,15,88,192, //addss %xmm8,%xmm8
6008 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6009 68,15,40,209, //movaps %xmm1,%xmm10
6010 69,15,94,208, //divps %xmm8,%xmm10
6011 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
6012 69,15,89,208, //mulps %xmm8,%xmm10
6013 65,15,92,202, //subps %xmm10,%xmm1
6014 65,15,92,201, //subps %xmm9,%xmm1
6015 69,15,87,192, //xorps %xmm8,%xmm8
6016 68,15,92,193, //subps %xmm1,%xmm8
6017 65,15,84,200, //andps %xmm8,%xmm1
6018 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
6019 102,69,15,254,193, //paddd %xmm9,%xmm8
6020 65,15,93,200, //minps %xmm8,%xmm1
6021 72,173, //lods %ds:(%rsi),%rax
6022 255,224, //jmpq *%rax
6023};
6024
Mike Kleine9ed07d2017-03-07 12:28:11 -05006025CODE const uint8_t sk_luminance_to_alpha_sse41[] = {
Mike Klein5224f462017-03-07 17:29:54 -05006026 184,208,179,89,62, //mov $0x3e59b3d0,%eax
6027 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05006028 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6029 15,89,216, //mulps %xmm0,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05006030 184,89,23,55,63, //mov $0x3f371759,%eax
6031 102,15,110,192, //movd %eax,%xmm0
6032 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6033 15,89,193, //mulps %xmm1,%xmm0
6034 15,88,195, //addps %xmm3,%xmm0
6035 184,152,221,147,61, //mov $0x3d93dd98,%eax
6036 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05006037 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6038 15,89,218, //mulps %xmm2,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05006039 15,88,216, //addps %xmm0,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05006040 72,173, //lods %ds:(%rsi),%rax
6041 15,87,192, //xorps %xmm0,%xmm0
6042 15,87,201, //xorps %xmm1,%xmm1
6043 15,87,210, //xorps %xmm2,%xmm2
6044 255,224, //jmpq *%rax
6045};
6046
Mike Klein894d5612017-03-07 07:59:52 -05006047CODE const uint8_t sk_matrix_2x3_sse41[] = {
6048 68,15,40,201, //movaps %xmm1,%xmm9
6049 68,15,40,192, //movaps %xmm0,%xmm8
6050 72,173, //lods %ds:(%rsi),%rax
6051 243,15,16,0, //movss (%rax),%xmm0
6052 243,15,16,72,4, //movss 0x4(%rax),%xmm1
6053 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6054 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
6055 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6056 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
6057 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6058 69,15,89,209, //mulps %xmm9,%xmm10
6059 69,15,88,211, //addps %xmm11,%xmm10
6060 65,15,89,192, //mulps %xmm8,%xmm0
6061 65,15,88,194, //addps %xmm10,%xmm0
6062 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
6063 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
6064 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6065 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
6066 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6067 69,15,89,209, //mulps %xmm9,%xmm10
6068 69,15,88,211, //addps %xmm11,%xmm10
6069 65,15,89,200, //mulps %xmm8,%xmm1
6070 65,15,88,202, //addps %xmm10,%xmm1
6071 72,173, //lods %ds:(%rsi),%rax
6072 255,224, //jmpq *%rax
6073};
6074
6075CODE const uint8_t sk_matrix_3x4_sse41[] = {
6076 68,15,40,201, //movaps %xmm1,%xmm9
6077 68,15,40,192, //movaps %xmm0,%xmm8
6078 72,173, //lods %ds:(%rsi),%rax
6079 243,15,16,0, //movss (%rax),%xmm0
6080 243,15,16,72,4, //movss 0x4(%rax),%xmm1
6081 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6082 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
6083 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6084 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
6085 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6086 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
6087 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6088 68,15,89,218, //mulps %xmm2,%xmm11
6089 69,15,88,220, //addps %xmm12,%xmm11
6090 69,15,89,209, //mulps %xmm9,%xmm10
6091 69,15,88,211, //addps %xmm11,%xmm10
6092 65,15,89,192, //mulps %xmm8,%xmm0
6093 65,15,88,194, //addps %xmm10,%xmm0
6094 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
6095 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
6096 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6097 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
6098 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6099 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
6100 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6101 68,15,89,218, //mulps %xmm2,%xmm11
6102 69,15,88,220, //addps %xmm12,%xmm11
6103 69,15,89,209, //mulps %xmm9,%xmm10
6104 69,15,88,211, //addps %xmm11,%xmm10
6105 65,15,89,200, //mulps %xmm8,%xmm1
6106 65,15,88,202, //addps %xmm10,%xmm1
6107 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
6108 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6109 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
6110 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6111 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
6112 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6113 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
6114 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
6115 68,15,89,226, //mulps %xmm2,%xmm12
6116 69,15,88,229, //addps %xmm13,%xmm12
6117 69,15,89,217, //mulps %xmm9,%xmm11
6118 69,15,88,220, //addps %xmm12,%xmm11
6119 69,15,89,208, //mulps %xmm8,%xmm10
6120 69,15,88,211, //addps %xmm11,%xmm10
6121 72,173, //lods %ds:(%rsi),%rax
6122 65,15,40,210, //movaps %xmm10,%xmm2
6123 255,224, //jmpq *%rax
6124};
6125
Mike Kleine9ed07d2017-03-07 12:28:11 -05006126CODE const uint8_t sk_matrix_4x5_sse41[] = {
6127 68,15,40,201, //movaps %xmm1,%xmm9
6128 68,15,40,192, //movaps %xmm0,%xmm8
6129 72,173, //lods %ds:(%rsi),%rax
6130 243,15,16,0, //movss (%rax),%xmm0
6131 243,15,16,72,4, //movss 0x4(%rax),%xmm1
6132 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6133 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
6134 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6135 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
6136 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6137 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
6138 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6139 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
6140 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
6141 68,15,89,227, //mulps %xmm3,%xmm12
6142 69,15,88,229, //addps %xmm13,%xmm12
6143 68,15,89,218, //mulps %xmm2,%xmm11
6144 69,15,88,220, //addps %xmm12,%xmm11
6145 69,15,89,209, //mulps %xmm9,%xmm10
6146 69,15,88,211, //addps %xmm11,%xmm10
6147 65,15,89,192, //mulps %xmm8,%xmm0
6148 65,15,88,194, //addps %xmm10,%xmm0
6149 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
6150 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
6151 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6152 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
6153 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6154 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
6155 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6156 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
6157 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
6158 68,15,89,227, //mulps %xmm3,%xmm12
6159 69,15,88,229, //addps %xmm13,%xmm12
6160 68,15,89,218, //mulps %xmm2,%xmm11
6161 69,15,88,220, //addps %xmm12,%xmm11
6162 69,15,89,209, //mulps %xmm9,%xmm10
6163 69,15,88,211, //addps %xmm11,%xmm10
6164 65,15,89,200, //mulps %xmm8,%xmm1
6165 65,15,88,202, //addps %xmm10,%xmm1
6166 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
6167 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6168 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
6169 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6170 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
6171 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6172 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
6173 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
6174 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
6175 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
6176 68,15,89,235, //mulps %xmm3,%xmm13
6177 69,15,88,238, //addps %xmm14,%xmm13
6178 68,15,89,226, //mulps %xmm2,%xmm12
6179 69,15,88,229, //addps %xmm13,%xmm12
6180 69,15,89,217, //mulps %xmm9,%xmm11
6181 69,15,88,220, //addps %xmm12,%xmm11
6182 69,15,89,208, //mulps %xmm8,%xmm10
6183 69,15,88,211, //addps %xmm11,%xmm10
6184 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
6185 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6186 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
6187 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6188 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
6189 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
6190 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
6191 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
6192 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
6193 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
6194 68,15,89,243, //mulps %xmm3,%xmm14
6195 69,15,88,247, //addps %xmm15,%xmm14
6196 68,15,89,234, //mulps %xmm2,%xmm13
6197 69,15,88,238, //addps %xmm14,%xmm13
6198 69,15,89,225, //mulps %xmm9,%xmm12
6199 69,15,88,229, //addps %xmm13,%xmm12
6200 69,15,89,216, //mulps %xmm8,%xmm11
6201 69,15,88,220, //addps %xmm12,%xmm11
6202 72,173, //lods %ds:(%rsi),%rax
6203 65,15,40,210, //movaps %xmm10,%xmm2
6204 65,15,40,219, //movaps %xmm11,%xmm3
6205 255,224, //jmpq *%rax
6206};
6207
Mike Klein894d5612017-03-07 07:59:52 -05006208CODE const uint8_t sk_matrix_perspective_sse41[] = {
6209 68,15,40,192, //movaps %xmm0,%xmm8
6210 72,173, //lods %ds:(%rsi),%rax
6211 243,15,16,0, //movss (%rax),%xmm0
6212 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
6213 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6214 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6215 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
6216 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6217 68,15,89,201, //mulps %xmm1,%xmm9
6218 69,15,88,202, //addps %xmm10,%xmm9
6219 65,15,89,192, //mulps %xmm8,%xmm0
6220 65,15,88,193, //addps %xmm9,%xmm0
6221 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
6222 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6223 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
6224 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6225 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
6226 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6227 68,15,89,209, //mulps %xmm1,%xmm10
6228 69,15,88,211, //addps %xmm11,%xmm10
6229 69,15,89,200, //mulps %xmm8,%xmm9
6230 69,15,88,202, //addps %xmm10,%xmm9
6231 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
6232 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6233 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
6234 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6235 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
6236 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6237 68,15,89,217, //mulps %xmm1,%xmm11
6238 69,15,88,220, //addps %xmm12,%xmm11
6239 69,15,89,208, //mulps %xmm8,%xmm10
6240 69,15,88,211, //addps %xmm11,%xmm10
6241 65,15,83,202, //rcpps %xmm10,%xmm1
6242 15,89,193, //mulps %xmm1,%xmm0
6243 68,15,89,201, //mulps %xmm1,%xmm9
6244 72,173, //lods %ds:(%rsi),%rax
6245 65,15,40,201, //movaps %xmm9,%xmm1
6246 255,224, //jmpq *%rax
6247};
6248
6249CODE const uint8_t sk_linear_gradient_2stops_sse41[] = {
6250 72,173, //lods %ds:(%rsi),%rax
6251 68,15,16,8, //movups (%rax),%xmm9
6252 15,16,88,16, //movups 0x10(%rax),%xmm3
6253 68,15,40,195, //movaps %xmm3,%xmm8
6254 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6255 65,15,40,201, //movaps %xmm9,%xmm1
6256 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
6257 68,15,89,192, //mulps %xmm0,%xmm8
6258 68,15,88,193, //addps %xmm1,%xmm8
6259 15,40,203, //movaps %xmm3,%xmm1
6260 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
6261 65,15,40,209, //movaps %xmm9,%xmm2
6262 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
6263 15,89,200, //mulps %xmm0,%xmm1
6264 15,88,202, //addps %xmm2,%xmm1
6265 15,40,211, //movaps %xmm3,%xmm2
6266 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
6267 69,15,40,209, //movaps %xmm9,%xmm10
6268 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
6269 15,89,208, //mulps %xmm0,%xmm2
6270 65,15,88,210, //addps %xmm10,%xmm2
6271 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
6272 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
6273 15,89,216, //mulps %xmm0,%xmm3
6274 65,15,88,217, //addps %xmm9,%xmm3
6275 72,173, //lods %ds:(%rsi),%rax
6276 65,15,40,192, //movaps %xmm8,%xmm0
6277 255,224, //jmpq *%rax
6278};
6279
6280CODE const uint8_t sk_start_pipeline_sse2[] = {
6281 65,87, //push %r15
6282 65,86, //push %r14
6283 65,85, //push %r13
6284 65,84, //push %r12
6285 83, //push %rbx
6286 73,137,207, //mov %rcx,%r15
6287 73,137,214, //mov %rdx,%r14
6288 72,137,251, //mov %rdi,%rbx
6289 72,173, //lods %ds:(%rsi),%rax
6290 73,137,196, //mov %rax,%r12
6291 73,137,245, //mov %rsi,%r13
6292 72,141,67,4, //lea 0x4(%rbx),%rax
6293 76,57,248, //cmp %r15,%rax
6294 118,5, //jbe 28 <_sk_start_pipeline_sse2+0x28>
6295 72,137,216, //mov %rbx,%rax
6296 235,52, //jmp 5c <_sk_start_pipeline_sse2+0x5c>
6297 15,87,192, //xorps %xmm0,%xmm0
6298 15,87,201, //xorps %xmm1,%xmm1
6299 15,87,210, //xorps %xmm2,%xmm2
6300 15,87,219, //xorps %xmm3,%xmm3
6301 15,87,228, //xorps %xmm4,%xmm4
6302 15,87,237, //xorps %xmm5,%xmm5
6303 15,87,246, //xorps %xmm6,%xmm6
6304 15,87,255, //xorps %xmm7,%xmm7
6305 72,137,223, //mov %rbx,%rdi
6306 76,137,238, //mov %r13,%rsi
6307 76,137,242, //mov %r14,%rdx
6308 65,255,212, //callq *%r12
6309 72,141,67,4, //lea 0x4(%rbx),%rax
6310 72,131,195,8, //add $0x8,%rbx
6311 76,57,251, //cmp %r15,%rbx
6312 72,137,195, //mov %rax,%rbx
6313 118,204, //jbe 28 <_sk_start_pipeline_sse2+0x28>
6314 91, //pop %rbx
6315 65,92, //pop %r12
6316 65,93, //pop %r13
6317 65,94, //pop %r14
6318 65,95, //pop %r15
6319 195, //retq
6320};
6321
6322CODE const uint8_t sk_just_return_sse2[] = {
6323 195, //retq
6324};
6325
6326CODE const uint8_t sk_seed_shader_sse2[] = {
6327 72,173, //lods %ds:(%rsi),%rax
6328 102,15,110,199, //movd %edi,%xmm0
6329 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
6330 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006331 185,0,0,0,63, //mov $0x3f000000,%ecx
6332 102,15,110,209, //movd %ecx,%xmm2
6333 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
6334 15,88,202, //addps %xmm2,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05006335 15,16,2, //movups (%rdx),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006336 15,88,193, //addps %xmm1,%xmm0
6337 102,15,110,8, //movd (%rax),%xmm1
6338 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
6339 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006340 15,88,202, //addps %xmm2,%xmm1
6341 184,0,0,128,63, //mov $0x3f800000,%eax
6342 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006343 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
6344 72,173, //lods %ds:(%rsi),%rax
6345 15,87,219, //xorps %xmm3,%xmm3
6346 15,87,228, //xorps %xmm4,%xmm4
6347 15,87,237, //xorps %xmm5,%xmm5
6348 15,87,246, //xorps %xmm6,%xmm6
6349 15,87,255, //xorps %xmm7,%xmm7
6350 255,224, //jmpq *%rax
6351};
6352
6353CODE const uint8_t sk_constant_color_sse2[] = {
6354 72,173, //lods %ds:(%rsi),%rax
6355 15,16,24, //movups (%rax),%xmm3
6356 15,40,195, //movaps %xmm3,%xmm0
6357 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6358 15,40,203, //movaps %xmm3,%xmm1
6359 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
6360 15,40,211, //movaps %xmm3,%xmm2
6361 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
6362 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
6363 72,173, //lods %ds:(%rsi),%rax
6364 255,224, //jmpq *%rax
6365};
6366
6367CODE const uint8_t sk_clear_sse2[] = {
6368 72,173, //lods %ds:(%rsi),%rax
6369 15,87,192, //xorps %xmm0,%xmm0
6370 15,87,201, //xorps %xmm1,%xmm1
6371 15,87,210, //xorps %xmm2,%xmm2
6372 15,87,219, //xorps %xmm3,%xmm3
6373 255,224, //jmpq *%rax
6374};
6375
6376CODE const uint8_t sk_plus__sse2[] = {
6377 15,88,196, //addps %xmm4,%xmm0
6378 15,88,205, //addps %xmm5,%xmm1
6379 15,88,214, //addps %xmm6,%xmm2
6380 15,88,223, //addps %xmm7,%xmm3
6381 72,173, //lods %ds:(%rsi),%rax
6382 255,224, //jmpq *%rax
6383};
6384
6385CODE const uint8_t sk_srcover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006386 184,0,0,128,63, //mov $0x3f800000,%eax
6387 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006388 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6389 68,15,92,195, //subps %xmm3,%xmm8
6390 69,15,40,200, //movaps %xmm8,%xmm9
6391 68,15,89,204, //mulps %xmm4,%xmm9
6392 65,15,88,193, //addps %xmm9,%xmm0
6393 69,15,40,200, //movaps %xmm8,%xmm9
6394 68,15,89,205, //mulps %xmm5,%xmm9
6395 65,15,88,201, //addps %xmm9,%xmm1
6396 69,15,40,200, //movaps %xmm8,%xmm9
6397 68,15,89,206, //mulps %xmm6,%xmm9
6398 65,15,88,209, //addps %xmm9,%xmm2
6399 68,15,89,199, //mulps %xmm7,%xmm8
6400 65,15,88,216, //addps %xmm8,%xmm3
6401 72,173, //lods %ds:(%rsi),%rax
6402 255,224, //jmpq *%rax
6403};
6404
6405CODE const uint8_t sk_dstover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006406 184,0,0,128,63, //mov $0x3f800000,%eax
6407 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006408 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6409 68,15,92,199, //subps %xmm7,%xmm8
6410 65,15,89,192, //mulps %xmm8,%xmm0
6411 15,88,196, //addps %xmm4,%xmm0
6412 65,15,89,200, //mulps %xmm8,%xmm1
6413 15,88,205, //addps %xmm5,%xmm1
6414 65,15,89,208, //mulps %xmm8,%xmm2
6415 15,88,214, //addps %xmm6,%xmm2
6416 65,15,89,216, //mulps %xmm8,%xmm3
6417 15,88,223, //addps %xmm7,%xmm3
6418 72,173, //lods %ds:(%rsi),%rax
6419 255,224, //jmpq *%rax
6420};
6421
6422CODE const uint8_t sk_clamp_0_sse2[] = {
6423 69,15,87,192, //xorps %xmm8,%xmm8
6424 65,15,95,192, //maxps %xmm8,%xmm0
6425 65,15,95,200, //maxps %xmm8,%xmm1
6426 65,15,95,208, //maxps %xmm8,%xmm2
6427 65,15,95,216, //maxps %xmm8,%xmm3
6428 72,173, //lods %ds:(%rsi),%rax
6429 255,224, //jmpq *%rax
6430};
6431
6432CODE const uint8_t sk_clamp_1_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006433 184,0,0,128,63, //mov $0x3f800000,%eax
6434 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006435 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6436 65,15,93,192, //minps %xmm8,%xmm0
6437 65,15,93,200, //minps %xmm8,%xmm1
6438 65,15,93,208, //minps %xmm8,%xmm2
6439 65,15,93,216, //minps %xmm8,%xmm3
6440 72,173, //lods %ds:(%rsi),%rax
6441 255,224, //jmpq *%rax
6442};
6443
6444CODE const uint8_t sk_clamp_a_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006445 184,0,0,128,63, //mov $0x3f800000,%eax
6446 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006447 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6448 65,15,93,216, //minps %xmm8,%xmm3
6449 15,93,195, //minps %xmm3,%xmm0
6450 15,93,203, //minps %xmm3,%xmm1
6451 15,93,211, //minps %xmm3,%xmm2
6452 72,173, //lods %ds:(%rsi),%rax
6453 255,224, //jmpq *%rax
6454};
6455
6456CODE const uint8_t sk_set_rgb_sse2[] = {
6457 72,173, //lods %ds:(%rsi),%rax
6458 243,15,16,0, //movss (%rax),%xmm0
6459 243,15,16,72,4, //movss 0x4(%rax),%xmm1
6460 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6461 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
6462 243,15,16,80,8, //movss 0x8(%rax),%xmm2
6463 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
6464 72,173, //lods %ds:(%rsi),%rax
6465 255,224, //jmpq *%rax
6466};
6467
6468CODE const uint8_t sk_swap_rb_sse2[] = {
6469 68,15,40,192, //movaps %xmm0,%xmm8
6470 72,173, //lods %ds:(%rsi),%rax
6471 15,40,194, //movaps %xmm2,%xmm0
6472 65,15,40,208, //movaps %xmm8,%xmm2
6473 255,224, //jmpq *%rax
6474};
6475
6476CODE const uint8_t sk_swap_sse2[] = {
6477 68,15,40,195, //movaps %xmm3,%xmm8
6478 68,15,40,202, //movaps %xmm2,%xmm9
6479 68,15,40,209, //movaps %xmm1,%xmm10
6480 68,15,40,216, //movaps %xmm0,%xmm11
6481 72,173, //lods %ds:(%rsi),%rax
6482 15,40,196, //movaps %xmm4,%xmm0
6483 15,40,205, //movaps %xmm5,%xmm1
6484 15,40,214, //movaps %xmm6,%xmm2
6485 15,40,223, //movaps %xmm7,%xmm3
6486 65,15,40,227, //movaps %xmm11,%xmm4
6487 65,15,40,234, //movaps %xmm10,%xmm5
6488 65,15,40,241, //movaps %xmm9,%xmm6
6489 65,15,40,248, //movaps %xmm8,%xmm7
6490 255,224, //jmpq *%rax
6491};
6492
6493CODE const uint8_t sk_move_src_dst_sse2[] = {
6494 72,173, //lods %ds:(%rsi),%rax
6495 15,40,224, //movaps %xmm0,%xmm4
6496 15,40,233, //movaps %xmm1,%xmm5
6497 15,40,242, //movaps %xmm2,%xmm6
6498 15,40,251, //movaps %xmm3,%xmm7
6499 255,224, //jmpq *%rax
6500};
6501
6502CODE const uint8_t sk_move_dst_src_sse2[] = {
6503 72,173, //lods %ds:(%rsi),%rax
6504 15,40,196, //movaps %xmm4,%xmm0
6505 15,40,205, //movaps %xmm5,%xmm1
6506 15,40,214, //movaps %xmm6,%xmm2
6507 15,40,223, //movaps %xmm7,%xmm3
6508 255,224, //jmpq *%rax
6509};
6510
6511CODE const uint8_t sk_premul_sse2[] = {
6512 15,89,195, //mulps %xmm3,%xmm0
6513 15,89,203, //mulps %xmm3,%xmm1
6514 15,89,211, //mulps %xmm3,%xmm2
6515 72,173, //lods %ds:(%rsi),%rax
6516 255,224, //jmpq *%rax
6517};
6518
6519CODE const uint8_t sk_unpremul_sse2[] = {
6520 69,15,87,192, //xorps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006521 184,0,0,128,63, //mov $0x3f800000,%eax
6522 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006523 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6524 68,15,94,203, //divps %xmm3,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07006525 68,15,194,195,4, //cmpneqps %xmm3,%xmm8
6526 69,15,84,193, //andps %xmm9,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006527 65,15,89,192, //mulps %xmm8,%xmm0
6528 65,15,89,200, //mulps %xmm8,%xmm1
6529 65,15,89,208, //mulps %xmm8,%xmm2
6530 72,173, //lods %ds:(%rsi),%rax
6531 255,224, //jmpq *%rax
6532};
6533
6534CODE const uint8_t sk_from_srgb_sse2[] = {
Mike Klein5224f462017-03-07 17:29:54 -05006535 184,145,131,158,61, //mov $0x3d9e8391,%eax
6536 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006537 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6538 69,15,40,232, //movaps %xmm8,%xmm13
6539 68,15,89,232, //mulps %xmm0,%xmm13
6540 68,15,40,224, //movaps %xmm0,%xmm12
6541 69,15,89,228, //mulps %xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -05006542 184,154,153,153,62, //mov $0x3e99999a,%eax
6543 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006544 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05006545 184,92,143,50,63, //mov $0x3f328f5c,%eax
6546 102,68,15,110,208, //movd %eax,%xmm10
6547 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05006548 69,15,40,241, //movaps %xmm9,%xmm14
6549 68,15,89,240, //mulps %xmm0,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -05006550 69,15,88,242, //addps %xmm10,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -05006551 184,10,215,35,59, //mov $0x3b23d70a,%eax
6552 102,68,15,110,216, //movd %eax,%xmm11
6553 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6554 69,15,89,244, //mulps %xmm12,%xmm14
6555 69,15,88,243, //addps %xmm11,%xmm14
6556 184,174,71,97,61, //mov $0x3d6147ae,%eax
6557 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05006558 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6559 65,15,194,196,1, //cmpltps %xmm12,%xmm0
6560 68,15,84,232, //andps %xmm0,%xmm13
6561 65,15,85,198, //andnps %xmm14,%xmm0
6562 65,15,86,197, //orps %xmm13,%xmm0
6563 69,15,40,232, //movaps %xmm8,%xmm13
6564 68,15,89,233, //mulps %xmm1,%xmm13
6565 68,15,40,241, //movaps %xmm1,%xmm14
6566 69,15,89,246, //mulps %xmm14,%xmm14
6567 69,15,40,249, //movaps %xmm9,%xmm15
6568 68,15,89,249, //mulps %xmm1,%xmm15
Mike Klein894d5612017-03-07 07:59:52 -05006569 69,15,88,250, //addps %xmm10,%xmm15
Mike Klein5224f462017-03-07 17:29:54 -05006570 69,15,89,254, //mulps %xmm14,%xmm15
6571 69,15,88,251, //addps %xmm11,%xmm15
Mike Klein894d5612017-03-07 07:59:52 -05006572 65,15,194,204,1, //cmpltps %xmm12,%xmm1
6573 68,15,84,233, //andps %xmm1,%xmm13
6574 65,15,85,207, //andnps %xmm15,%xmm1
6575 65,15,86,205, //orps %xmm13,%xmm1
6576 68,15,89,194, //mulps %xmm2,%xmm8
6577 68,15,40,234, //movaps %xmm2,%xmm13
6578 69,15,89,237, //mulps %xmm13,%xmm13
6579 68,15,89,202, //mulps %xmm2,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006580 69,15,88,202, //addps %xmm10,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05006581 69,15,89,205, //mulps %xmm13,%xmm9
6582 69,15,88,203, //addps %xmm11,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006583 65,15,194,212,1, //cmpltps %xmm12,%xmm2
6584 68,15,84,194, //andps %xmm2,%xmm8
6585 65,15,85,209, //andnps %xmm9,%xmm2
6586 65,15,86,208, //orps %xmm8,%xmm2
6587 72,173, //lods %ds:(%rsi),%rax
6588 255,224, //jmpq *%rax
6589};
6590
6591CODE const uint8_t sk_to_srgb_sse2[] = {
Mike Klein894d5612017-03-07 07:59:52 -05006592 68,15,82,192, //rsqrtps %xmm0,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05006593 69,15,83,248, //rcpps %xmm8,%xmm15
6594 69,15,82,232, //rsqrtps %xmm8,%xmm13
6595 184,41,92,71,65, //mov $0x41475c29,%eax
6596 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006597 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6598 69,15,40,240, //movaps %xmm8,%xmm14
6599 68,15,89,240, //mulps %xmm0,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -05006600 184,0,0,128,63, //mov $0x3f800000,%eax
6601 102,68,15,110,200, //movd %eax,%xmm9
6602 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6603 184,194,135,210,62, //mov $0x3ed287c2,%eax
6604 102,68,15,110,208, //movd %eax,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05006605 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
Mike Klein5224f462017-03-07 17:29:54 -05006606 184,206,111,48,63, //mov $0x3f306fce,%eax
6607 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05006608 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
Mike Klein5224f462017-03-07 17:29:54 -05006609 184,168,87,202,61, //mov $0x3dca57a8,%eax
6610 53,0,0,0,128, //xor $0x80000000,%eax
6611 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05006612 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -05006613 69,15,89,251, //mulps %xmm11,%xmm15
6614 69,15,88,252, //addps %xmm12,%xmm15
6615 69,15,89,234, //mulps %xmm10,%xmm13
6616 69,15,88,239, //addps %xmm15,%xmm13
6617 69,15,40,249, //movaps %xmm9,%xmm15
6618 69,15,93,253, //minps %xmm13,%xmm15
6619 184,4,231,140,59, //mov $0x3b8ce704,%eax
6620 102,68,15,110,232, //movd %eax,%xmm13
Mike Klein894d5612017-03-07 07:59:52 -05006621 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
6622 65,15,194,197,1, //cmpltps %xmm13,%xmm0
6623 68,15,84,240, //andps %xmm0,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -05006624 65,15,85,199, //andnps %xmm15,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006625 65,15,86,198, //orps %xmm14,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05006626 68,15,82,241, //rsqrtps %xmm1,%xmm14
6627 69,15,83,254, //rcpps %xmm14,%xmm15
6628 69,15,82,246, //rsqrtps %xmm14,%xmm14
6629 69,15,89,251, //mulps %xmm11,%xmm15
6630 69,15,88,252, //addps %xmm12,%xmm15
6631 69,15,89,242, //mulps %xmm10,%xmm14
6632 69,15,88,247, //addps %xmm15,%xmm14
6633 69,15,40,249, //movaps %xmm9,%xmm15
6634 69,15,93,254, //minps %xmm14,%xmm15
6635 69,15,40,240, //movaps %xmm8,%xmm14
6636 68,15,89,241, //mulps %xmm1,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -05006637 65,15,194,205,1, //cmpltps %xmm13,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05006638 68,15,84,241, //andps %xmm1,%xmm14
6639 65,15,85,207, //andnps %xmm15,%xmm1
6640 65,15,86,206, //orps %xmm14,%xmm1
6641 68,15,82,242, //rsqrtps %xmm2,%xmm14
6642 69,15,83,254, //rcpps %xmm14,%xmm15
6643 69,15,89,251, //mulps %xmm11,%xmm15
6644 69,15,88,252, //addps %xmm12,%xmm15
6645 69,15,82,222, //rsqrtps %xmm14,%xmm11
6646 69,15,89,218, //mulps %xmm10,%xmm11
6647 69,15,88,223, //addps %xmm15,%xmm11
6648 69,15,93,203, //minps %xmm11,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006649 68,15,89,194, //mulps %xmm2,%xmm8
6650 65,15,194,213,1, //cmpltps %xmm13,%xmm2
6651 68,15,84,194, //andps %xmm2,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05006652 65,15,85,209, //andnps %xmm9,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006653 65,15,86,208, //orps %xmm8,%xmm2
6654 72,173, //lods %ds:(%rsi),%rax
Mike Klein894d5612017-03-07 07:59:52 -05006655 255,224, //jmpq *%rax
6656};
6657
6658CODE const uint8_t sk_scale_1_float_sse2[] = {
6659 72,173, //lods %ds:(%rsi),%rax
6660 243,68,15,16,0, //movss (%rax),%xmm8
6661 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6662 65,15,89,192, //mulps %xmm8,%xmm0
6663 65,15,89,200, //mulps %xmm8,%xmm1
6664 65,15,89,208, //mulps %xmm8,%xmm2
6665 65,15,89,216, //mulps %xmm8,%xmm3
6666 72,173, //lods %ds:(%rsi),%rax
6667 255,224, //jmpq *%rax
6668};
6669
6670CODE const uint8_t sk_scale_u8_sse2[] = {
6671 72,173, //lods %ds:(%rsi),%rax
6672 72,139,0, //mov (%rax),%rax
6673 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
6674 102,69,15,239,201, //pxor %xmm9,%xmm9
6675 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
6676 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
6677 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006678 184,129,128,128,59, //mov $0x3b808081,%eax
6679 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006680 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6681 69,15,89,200, //mulps %xmm8,%xmm9
6682 65,15,89,193, //mulps %xmm9,%xmm0
6683 65,15,89,201, //mulps %xmm9,%xmm1
6684 65,15,89,209, //mulps %xmm9,%xmm2
6685 65,15,89,217, //mulps %xmm9,%xmm3
6686 72,173, //lods %ds:(%rsi),%rax
6687 255,224, //jmpq *%rax
6688};
6689
6690CODE const uint8_t sk_lerp_1_float_sse2[] = {
6691 72,173, //lods %ds:(%rsi),%rax
6692 243,68,15,16,0, //movss (%rax),%xmm8
6693 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6694 15,92,196, //subps %xmm4,%xmm0
6695 65,15,89,192, //mulps %xmm8,%xmm0
6696 15,88,196, //addps %xmm4,%xmm0
6697 15,92,205, //subps %xmm5,%xmm1
6698 65,15,89,200, //mulps %xmm8,%xmm1
6699 15,88,205, //addps %xmm5,%xmm1
6700 15,92,214, //subps %xmm6,%xmm2
6701 65,15,89,208, //mulps %xmm8,%xmm2
6702 15,88,214, //addps %xmm6,%xmm2
6703 15,92,223, //subps %xmm7,%xmm3
6704 65,15,89,216, //mulps %xmm8,%xmm3
6705 15,88,223, //addps %xmm7,%xmm3
6706 72,173, //lods %ds:(%rsi),%rax
6707 255,224, //jmpq *%rax
6708};
6709
6710CODE const uint8_t sk_lerp_u8_sse2[] = {
6711 72,173, //lods %ds:(%rsi),%rax
6712 72,139,0, //mov (%rax),%rax
6713 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
6714 102,69,15,239,201, //pxor %xmm9,%xmm9
6715 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
6716 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
6717 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006718 184,129,128,128,59, //mov $0x3b808081,%eax
6719 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006720 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6721 69,15,89,200, //mulps %xmm8,%xmm9
6722 15,92,196, //subps %xmm4,%xmm0
6723 65,15,89,193, //mulps %xmm9,%xmm0
6724 15,88,196, //addps %xmm4,%xmm0
6725 15,92,205, //subps %xmm5,%xmm1
6726 65,15,89,201, //mulps %xmm9,%xmm1
6727 15,88,205, //addps %xmm5,%xmm1
6728 15,92,214, //subps %xmm6,%xmm2
6729 65,15,89,209, //mulps %xmm9,%xmm2
6730 15,88,214, //addps %xmm6,%xmm2
6731 15,92,223, //subps %xmm7,%xmm3
6732 65,15,89,217, //mulps %xmm9,%xmm3
6733 15,88,223, //addps %xmm7,%xmm3
6734 72,173, //lods %ds:(%rsi),%rax
6735 255,224, //jmpq *%rax
6736};
6737
6738CODE const uint8_t sk_lerp_565_sse2[] = {
6739 72,173, //lods %ds:(%rsi),%rax
6740 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05006741 243,68,15,126,4,120, //movq (%rax,%rdi,2),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006742 102,15,239,219, //pxor %xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05006743 102,68,15,97,195, //punpcklwd %xmm3,%xmm8
6744 184,0,248,0,0, //mov $0xf800,%eax
6745 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006746 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05006747 102,65,15,219,216, //pand %xmm8,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006748 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05006749 184,8,33,132,55, //mov $0x37842108,%eax
6750 102,68,15,110,208, //movd %eax,%xmm10
6751 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6752 69,15,89,209, //mulps %xmm9,%xmm10
6753 184,224,7,0,0, //mov $0x7e0,%eax
6754 102,15,110,216, //movd %eax,%xmm3
6755 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
6756 102,65,15,219,216, //pand %xmm8,%xmm3
6757 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
6758 184,33,8,2,58, //mov $0x3a020821,%eax
6759 102,68,15,110,216, //movd %eax,%xmm11
6760 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6761 69,15,89,217, //mulps %xmm9,%xmm11
6762 184,31,0,0,0, //mov $0x1f,%eax
6763 102,15,110,216, //movd %eax,%xmm3
6764 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
6765 102,65,15,219,216, //pand %xmm8,%xmm3
6766 68,15,91,195, //cvtdq2ps %xmm3,%xmm8
6767 184,8,33,4,61, //mov $0x3d042108,%eax
6768 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006769 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05006770 65,15,89,216, //mulps %xmm8,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006771 15,92,196, //subps %xmm4,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05006772 65,15,89,194, //mulps %xmm10,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006773 15,88,196, //addps %xmm4,%xmm0
6774 15,92,205, //subps %xmm5,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05006775 65,15,89,203, //mulps %xmm11,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05006776 15,88,205, //addps %xmm5,%xmm1
6777 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006778 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006779 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006780 184,0,0,128,63, //mov $0x3f800000,%eax
6781 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006782 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6783 72,173, //lods %ds:(%rsi),%rax
6784 255,224, //jmpq *%rax
6785};
6786
6787CODE const uint8_t sk_load_tables_sse2[] = {
6788 72,173, //lods %ds:(%rsi),%rax
6789 72,139,8, //mov (%rax),%rcx
6790 76,139,64,8, //mov 0x8(%rax),%r8
6791 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05006792 185,255,0,0,0, //mov $0xff,%ecx
6793 102,15,110,193, //movd %ecx,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006794 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
6795 102,69,15,111,200, //movdqa %xmm8,%xmm9
6796 102,65,15,114,209,8, //psrld $0x8,%xmm9
6797 102,68,15,219,200, //pand %xmm0,%xmm9
6798 102,69,15,111,208, //movdqa %xmm8,%xmm10
6799 102,65,15,114,210,16, //psrld $0x10,%xmm10
6800 102,68,15,219,208, //pand %xmm0,%xmm10
6801 102,65,15,219,192, //pand %xmm8,%xmm0
6802 102,15,112,216,78, //pshufd $0x4e,%xmm0,%xmm3
6803 102,72,15,126,217, //movq %xmm3,%rcx
6804 65,137,201, //mov %ecx,%r9d
6805 72,193,233,32, //shr $0x20,%rcx
6806 102,73,15,126,194, //movq %xmm0,%r10
6807 69,137,211, //mov %r10d,%r11d
6808 73,193,234,32, //shr $0x20,%r10
6809 243,67,15,16,28,144, //movss (%r8,%r10,4),%xmm3
6810 243,65,15,16,4,136, //movss (%r8,%rcx,4),%xmm0
6811 15,20,216, //unpcklps %xmm0,%xmm3
6812 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
6813 243,67,15,16,12,136, //movss (%r8,%r9,4),%xmm1
6814 15,20,193, //unpcklps %xmm1,%xmm0
6815 15,20,195, //unpcklps %xmm3,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07006816 76,139,64,16, //mov 0x10(%rax),%r8
Mike Klein894d5612017-03-07 07:59:52 -05006817 102,65,15,112,201,78, //pshufd $0x4e,%xmm9,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07006818 102,73,15,126,202, //movq %xmm1,%r10
6819 77,137,209, //mov %r10,%r9
6820 73,193,233,32, //shr $0x20,%r9
6821 102,76,15,126,201, //movq %xmm9,%rcx
6822 65,137,203, //mov %ecx,%r11d
6823 65,129,227,255,255,255,0, //and $0xffffff,%r11d
6824 72,193,233,30, //shr $0x1e,%rcx
6825 65,129,226,255,255,255,0, //and $0xffffff,%r10d
6826 243,65,15,16,28,8, //movss (%r8,%rcx,1),%xmm3
6827 243,67,15,16,12,136, //movss (%r8,%r9,4),%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05006828 15,20,217, //unpcklps %xmm1,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07006829 243,67,15,16,12,152, //movss (%r8,%r11,4),%xmm1
6830 243,67,15,16,20,144, //movss (%r8,%r10,4),%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006831 15,20,202, //unpcklps %xmm2,%xmm1
6832 15,20,203, //unpcklps %xmm3,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07006833 76,139,72,24, //mov 0x18(%rax),%r9
Mike Klein894d5612017-03-07 07:59:52 -05006834 102,65,15,112,210,78, //pshufd $0x4e,%xmm10,%xmm2
6835 102,72,15,126,209, //movq %xmm2,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07006836 68,15,183,193, //movzwl %cx,%r8d
Mike Klein894d5612017-03-07 07:59:52 -05006837 72,193,233,32, //shr $0x20,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07006838 102,76,15,126,208, //movq %xmm10,%rax
6839 68,15,183,208, //movzwl %ax,%r10d
6840 72,193,232,30, //shr $0x1e,%rax
6841 243,69,15,16,12,1, //movss (%r9,%rax,1),%xmm9
6842 243,65,15,16,20,137, //movss (%r9,%rcx,4),%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006843 68,15,20,202, //unpcklps %xmm2,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07006844 243,67,15,16,20,145, //movss (%r9,%r10,4),%xmm2
6845 243,67,15,16,28,129, //movss (%r9,%r8,4),%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006846 15,20,211, //unpcklps %xmm3,%xmm2
6847 65,15,20,209, //unpcklps %xmm9,%xmm2
6848 102,65,15,114,208,24, //psrld $0x18,%xmm8
6849 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05006850 184,129,128,128,59, //mov $0x3b808081,%eax
6851 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006852 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6853 65,15,89,216, //mulps %xmm8,%xmm3
6854 72,173, //lods %ds:(%rsi),%rax
6855 255,224, //jmpq *%rax
6856};
6857
6858CODE const uint8_t sk_load_a8_sse2[] = {
6859 72,173, //lods %ds:(%rsi),%rax
6860 72,139,0, //mov (%rax),%rax
6861 102,15,110,4,56, //movd (%rax,%rdi,1),%xmm0
6862 102,15,239,201, //pxor %xmm1,%xmm1
6863 102,15,96,193, //punpcklbw %xmm1,%xmm0
6864 102,15,97,193, //punpcklwd %xmm1,%xmm0
6865 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05006866 184,129,128,128,59, //mov $0x3b808081,%eax
6867 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006868 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6869 15,89,216, //mulps %xmm0,%xmm3
6870 72,173, //lods %ds:(%rsi),%rax
6871 15,87,192, //xorps %xmm0,%xmm0
6872 102,15,239,201, //pxor %xmm1,%xmm1
6873 15,87,210, //xorps %xmm2,%xmm2
6874 255,224, //jmpq *%rax
6875};
6876
6877CODE const uint8_t sk_store_a8_sse2[] = {
6878 72,173, //lods %ds:(%rsi),%rax
6879 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05006880 185,0,0,127,67, //mov $0x437f0000,%ecx
6881 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006882 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6883 68,15,89,195, //mulps %xmm3,%xmm8
6884 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
6885 102,65,15,114,240,16, //pslld $0x10,%xmm8
6886 102,65,15,114,224,16, //psrad $0x10,%xmm8
6887 102,69,15,107,192, //packssdw %xmm8,%xmm8
6888 102,69,15,103,192, //packuswb %xmm8,%xmm8
6889 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
6890 72,173, //lods %ds:(%rsi),%rax
6891 255,224, //jmpq *%rax
6892};
6893
6894CODE const uint8_t sk_load_565_sse2[] = {
6895 72,173, //lods %ds:(%rsi),%rax
6896 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05006897 243,15,126,20,120, //movq (%rax,%rdi,2),%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006898 102,15,239,192, //pxor %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05006899 102,15,97,208, //punpcklwd %xmm0,%xmm2
6900 184,0,248,0,0, //mov $0xf800,%eax
6901 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006902 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05006903 102,15,219,194, //pand %xmm2,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006904 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05006905 184,8,33,132,55, //mov $0x37842108,%eax
6906 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006907 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6908 15,89,193, //mulps %xmm1,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05006909 184,224,7,0,0, //mov $0x7e0,%eax
6910 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05006911 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05006912 102,15,219,202, //pand %xmm2,%xmm1
6913 15,91,217, //cvtdq2ps %xmm1,%xmm3
6914 184,33,8,2,58, //mov $0x3a020821,%eax
6915 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05006916 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05006917 15,89,203, //mulps %xmm3,%xmm1
6918 184,31,0,0,0, //mov $0x1f,%eax
6919 102,15,110,216, //movd %eax,%xmm3
6920 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
6921 102,15,219,218, //pand %xmm2,%xmm3
6922 15,91,219, //cvtdq2ps %xmm3,%xmm3
6923 184,8,33,4,61, //mov $0x3d042108,%eax
6924 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006925 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -05006926 15,89,211, //mulps %xmm3,%xmm2
6927 184,0,0,128,63, //mov $0x3f800000,%eax
6928 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006929 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6930 72,173, //lods %ds:(%rsi),%rax
6931 255,224, //jmpq *%rax
6932};
6933
6934CODE const uint8_t sk_store_565_sse2[] = {
6935 72,173, //lods %ds:(%rsi),%rax
6936 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05006937 185,0,0,248,65, //mov $0x41f80000,%ecx
6938 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006939 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05006940 69,15,40,200, //movaps %xmm8,%xmm9
6941 68,15,89,200, //mulps %xmm0,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006942 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -05006943 102,65,15,114,241,11, //pslld $0xb,%xmm9
6944 185,0,0,124,66, //mov $0x427c0000,%ecx
6945 102,68,15,110,209, //movd %ecx,%xmm10
6946 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6947 68,15,89,209, //mulps %xmm1,%xmm10
6948 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
6949 102,65,15,114,242,5, //pslld $0x5,%xmm10
6950 102,69,15,235,209, //por %xmm9,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05006951 68,15,89,194, //mulps %xmm2,%xmm8
6952 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05006953 102,69,15,86,194, //orpd %xmm10,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006954 102,65,15,114,240,16, //pslld $0x10,%xmm8
6955 102,65,15,114,224,16, //psrad $0x10,%xmm8
6956 102,69,15,107,192, //packssdw %xmm8,%xmm8
6957 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
6958 72,173, //lods %ds:(%rsi),%rax
6959 255,224, //jmpq *%rax
6960};
6961
6962CODE const uint8_t sk_load_8888_sse2[] = {
6963 72,173, //lods %ds:(%rsi),%rax
6964 72,139,0, //mov (%rax),%rax
6965 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006966 184,255,0,0,0, //mov $0xff,%eax
6967 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006968 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
6969 102,15,111,203, //movdqa %xmm3,%xmm1
6970 102,15,114,209,8, //psrld $0x8,%xmm1
6971 102,15,219,200, //pand %xmm0,%xmm1
6972 102,15,111,211, //movdqa %xmm3,%xmm2
6973 102,15,114,210,16, //psrld $0x10,%xmm2
6974 102,15,219,208, //pand %xmm0,%xmm2
6975 102,15,219,195, //pand %xmm3,%xmm0
6976 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006977 184,129,128,128,59, //mov $0x3b808081,%eax
6978 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006979 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6980 65,15,89,192, //mulps %xmm8,%xmm0
6981 15,91,201, //cvtdq2ps %xmm1,%xmm1
6982 65,15,89,200, //mulps %xmm8,%xmm1
6983 15,91,210, //cvtdq2ps %xmm2,%xmm2
6984 65,15,89,208, //mulps %xmm8,%xmm2
6985 102,15,114,211,24, //psrld $0x18,%xmm3
6986 15,91,219, //cvtdq2ps %xmm3,%xmm3
6987 65,15,89,216, //mulps %xmm8,%xmm3
6988 72,173, //lods %ds:(%rsi),%rax
6989 255,224, //jmpq *%rax
6990};
6991
6992CODE const uint8_t sk_store_8888_sse2[] = {
6993 72,173, //lods %ds:(%rsi),%rax
6994 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006995 185,0,0,127,67, //mov $0x437f0000,%ecx
6996 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006997 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6998 69,15,40,200, //movaps %xmm8,%xmm9
6999 68,15,89,200, //mulps %xmm0,%xmm9
7000 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
7001 69,15,40,208, //movaps %xmm8,%xmm10
7002 68,15,89,209, //mulps %xmm1,%xmm10
7003 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
7004 102,65,15,114,242,8, //pslld $0x8,%xmm10
7005 102,69,15,235,209, //por %xmm9,%xmm10
7006 69,15,40,200, //movaps %xmm8,%xmm9
7007 68,15,89,202, //mulps %xmm2,%xmm9
7008 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
7009 102,65,15,114,241,16, //pslld $0x10,%xmm9
7010 68,15,89,195, //mulps %xmm3,%xmm8
7011 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
7012 102,65,15,114,240,24, //pslld $0x18,%xmm8
7013 102,69,15,235,193, //por %xmm9,%xmm8
7014 102,69,15,235,194, //por %xmm10,%xmm8
7015 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
7016 72,173, //lods %ds:(%rsi),%rax
7017 255,224, //jmpq *%rax
7018};
7019
7020CODE const uint8_t sk_load_f16_sse2[] = {
7021 72,173, //lods %ds:(%rsi),%rax
7022 72,139,0, //mov (%rax),%rax
7023 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
7024 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
7025 102,15,111,208, //movdqa %xmm0,%xmm2
7026 102,15,97,209, //punpcklwd %xmm1,%xmm2
7027 102,15,105,193, //punpckhwd %xmm1,%xmm0
7028 102,68,15,111,194, //movdqa %xmm2,%xmm8
7029 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
7030 102,15,105,208, //punpckhwd %xmm0,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -05007031 184,0,4,0,4, //mov $0x4000400,%eax
7032 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05007033 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
7034 102,15,111,203, //movdqa %xmm3,%xmm1
7035 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
7036 102,65,15,223,200, //pandn %xmm8,%xmm1
7037 102,15,101,218, //pcmpgtw %xmm2,%xmm3
7038 102,15,223,218, //pandn %xmm2,%xmm3
7039 102,69,15,239,192, //pxor %xmm8,%xmm8
7040 102,15,111,193, //movdqa %xmm1,%xmm0
7041 102,65,15,97,192, //punpcklwd %xmm8,%xmm0
7042 102,15,114,240,13, //pslld $0xd,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05007043 184,0,0,128,119, //mov $0x77800000,%eax
7044 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05007045 102,68,15,112,202,0, //pshufd $0x0,%xmm2,%xmm9
7046 65,15,89,193, //mulps %xmm9,%xmm0
7047 102,65,15,105,200, //punpckhwd %xmm8,%xmm1
7048 102,15,114,241,13, //pslld $0xd,%xmm1
7049 65,15,89,201, //mulps %xmm9,%xmm1
7050 102,15,111,211, //movdqa %xmm3,%xmm2
7051 102,65,15,97,208, //punpcklwd %xmm8,%xmm2
7052 102,15,114,242,13, //pslld $0xd,%xmm2
7053 65,15,89,209, //mulps %xmm9,%xmm2
7054 102,65,15,105,216, //punpckhwd %xmm8,%xmm3
7055 102,15,114,243,13, //pslld $0xd,%xmm3
7056 65,15,89,217, //mulps %xmm9,%xmm3
7057 72,173, //lods %ds:(%rsi),%rax
7058 255,224, //jmpq *%rax
7059};
7060
7061CODE const uint8_t sk_store_f16_sse2[] = {
7062 72,173, //lods %ds:(%rsi),%rax
7063 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -05007064 185,0,0,128,7, //mov $0x7800000,%ecx
7065 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05007066 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
7067 102,69,15,111,200, //movdqa %xmm8,%xmm9
7068 68,15,89,200, //mulps %xmm0,%xmm9
7069 102,65,15,114,209,13, //psrld $0xd,%xmm9
7070 102,69,15,111,208, //movdqa %xmm8,%xmm10
7071 68,15,89,209, //mulps %xmm1,%xmm10
7072 102,65,15,114,210,13, //psrld $0xd,%xmm10
7073 102,69,15,111,216, //movdqa %xmm8,%xmm11
7074 68,15,89,218, //mulps %xmm2,%xmm11
7075 102,65,15,114,211,13, //psrld $0xd,%xmm11
7076 68,15,89,195, //mulps %xmm3,%xmm8
7077 102,65,15,114,208,13, //psrld $0xd,%xmm8
7078 102,65,15,115,250,2, //pslldq $0x2,%xmm10
7079 102,69,15,235,209, //por %xmm9,%xmm10
7080 102,65,15,115,248,2, //pslldq $0x2,%xmm8
7081 102,69,15,235,195, //por %xmm11,%xmm8
7082 102,69,15,111,202, //movdqa %xmm10,%xmm9
7083 102,69,15,98,200, //punpckldq %xmm8,%xmm9
7084 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
7085 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
7086 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
7087 72,173, //lods %ds:(%rsi),%rax
7088 255,224, //jmpq *%rax
7089};
7090
7091CODE const uint8_t sk_store_f32_sse2[] = {
7092 72,173, //lods %ds:(%rsi),%rax
7093 72,139,0, //mov (%rax),%rax
7094 72,137,249, //mov %rdi,%rcx
7095 72,193,225,4, //shl $0x4,%rcx
7096 68,15,40,192, //movaps %xmm0,%xmm8
7097 68,15,40,200, //movaps %xmm0,%xmm9
7098 68,15,20,201, //unpcklps %xmm1,%xmm9
7099 68,15,40,210, //movaps %xmm2,%xmm10
7100 68,15,40,218, //movaps %xmm2,%xmm11
7101 68,15,20,219, //unpcklps %xmm3,%xmm11
7102 68,15,21,193, //unpckhps %xmm1,%xmm8
7103 68,15,21,211, //unpckhps %xmm3,%xmm10
7104 69,15,40,225, //movaps %xmm9,%xmm12
7105 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -07007106 69,15,18,217, //movhlps %xmm9,%xmm11
7107 69,15,40,200, //movaps %xmm8,%xmm9
7108 102,69,15,20,202, //unpcklpd %xmm10,%xmm9
7109 69,15,18,208, //movhlps %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05007110 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
Mike Klein64b97482017-03-14 17:35:04 -07007111 68,15,17,92,8,16, //movups %xmm11,0x10(%rax,%rcx,1)
7112 102,68,15,17,76,8,32, //movupd %xmm9,0x20(%rax,%rcx,1)
7113 68,15,17,84,8,48, //movups %xmm10,0x30(%rax,%rcx,1)
Mike Klein894d5612017-03-07 07:59:52 -05007114 72,173, //lods %ds:(%rsi),%rax
7115 255,224, //jmpq *%rax
7116};
7117
7118CODE const uint8_t sk_clamp_x_sse2[] = {
7119 72,173, //lods %ds:(%rsi),%rax
7120 69,15,87,192, //xorps %xmm8,%xmm8
7121 68,15,95,192, //maxps %xmm0,%xmm8
7122 243,68,15,16,8, //movss (%rax),%xmm9
7123 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7124 102,15,118,192, //pcmpeqd %xmm0,%xmm0
7125 102,65,15,254,193, //paddd %xmm9,%xmm0
7126 68,15,93,192, //minps %xmm0,%xmm8
7127 72,173, //lods %ds:(%rsi),%rax
7128 65,15,40,192, //movaps %xmm8,%xmm0
7129 255,224, //jmpq *%rax
7130};
7131
7132CODE const uint8_t sk_clamp_y_sse2[] = {
7133 72,173, //lods %ds:(%rsi),%rax
7134 69,15,87,192, //xorps %xmm8,%xmm8
7135 68,15,95,193, //maxps %xmm1,%xmm8
7136 243,68,15,16,8, //movss (%rax),%xmm9
7137 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7138 102,15,118,201, //pcmpeqd %xmm1,%xmm1
7139 102,65,15,254,201, //paddd %xmm9,%xmm1
7140 68,15,93,193, //minps %xmm1,%xmm8
7141 72,173, //lods %ds:(%rsi),%rax
7142 65,15,40,200, //movaps %xmm8,%xmm1
7143 255,224, //jmpq *%rax
7144};
7145
7146CODE const uint8_t sk_repeat_x_sse2[] = {
7147 72,173, //lods %ds:(%rsi),%rax
7148 243,68,15,16,0, //movss (%rax),%xmm8
7149 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
7150 68,15,40,200, //movaps %xmm0,%xmm9
7151 69,15,94,200, //divps %xmm8,%xmm9
7152 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
7153 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
7154 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007155 184,0,0,128,63, //mov $0x3f800000,%eax
7156 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05007157 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7158 69,15,84,217, //andps %xmm9,%xmm11
7159 69,15,92,211, //subps %xmm11,%xmm10
7160 69,15,89,208, //mulps %xmm8,%xmm10
7161 65,15,92,194, //subps %xmm10,%xmm0
7162 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
7163 102,69,15,254,200, //paddd %xmm8,%xmm9
7164 65,15,93,193, //minps %xmm9,%xmm0
7165 72,173, //lods %ds:(%rsi),%rax
7166 255,224, //jmpq *%rax
7167};
7168
7169CODE const uint8_t sk_repeat_y_sse2[] = {
7170 72,173, //lods %ds:(%rsi),%rax
7171 243,68,15,16,0, //movss (%rax),%xmm8
7172 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
7173 68,15,40,201, //movaps %xmm1,%xmm9
7174 69,15,94,200, //divps %xmm8,%xmm9
7175 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
7176 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
7177 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007178 184,0,0,128,63, //mov $0x3f800000,%eax
7179 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05007180 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7181 69,15,84,217, //andps %xmm9,%xmm11
7182 69,15,92,211, //subps %xmm11,%xmm10
7183 69,15,89,208, //mulps %xmm8,%xmm10
7184 65,15,92,202, //subps %xmm10,%xmm1
7185 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
7186 102,69,15,254,200, //paddd %xmm8,%xmm9
7187 65,15,93,201, //minps %xmm9,%xmm1
7188 72,173, //lods %ds:(%rsi),%rax
7189 255,224, //jmpq *%rax
7190};
7191
7192CODE const uint8_t sk_mirror_x_sse2[] = {
7193 72,173, //lods %ds:(%rsi),%rax
7194 243,68,15,16,8, //movss (%rax),%xmm9
7195 69,15,40,193, //movaps %xmm9,%xmm8
7196 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
7197 65,15,92,192, //subps %xmm8,%xmm0
7198 243,69,15,88,201, //addss %xmm9,%xmm9
7199 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7200 68,15,40,208, //movaps %xmm0,%xmm10
7201 69,15,94,209, //divps %xmm9,%xmm10
7202 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
7203 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
7204 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007205 184,0,0,128,63, //mov $0x3f800000,%eax
7206 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05007207 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7208 69,15,84,226, //andps %xmm10,%xmm12
7209 69,15,87,210, //xorps %xmm10,%xmm10
7210 69,15,92,220, //subps %xmm12,%xmm11
7211 69,15,89,217, //mulps %xmm9,%xmm11
7212 65,15,92,195, //subps %xmm11,%xmm0
7213 65,15,92,192, //subps %xmm8,%xmm0
7214 68,15,92,208, //subps %xmm0,%xmm10
7215 65,15,84,194, //andps %xmm10,%xmm0
7216 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
7217 102,69,15,254,200, //paddd %xmm8,%xmm9
7218 65,15,93,193, //minps %xmm9,%xmm0
7219 72,173, //lods %ds:(%rsi),%rax
7220 255,224, //jmpq *%rax
7221};
7222
7223CODE const uint8_t sk_mirror_y_sse2[] = {
7224 72,173, //lods %ds:(%rsi),%rax
7225 243,68,15,16,8, //movss (%rax),%xmm9
7226 69,15,40,193, //movaps %xmm9,%xmm8
7227 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
7228 65,15,92,200, //subps %xmm8,%xmm1
7229 243,69,15,88,201, //addss %xmm9,%xmm9
7230 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7231 68,15,40,209, //movaps %xmm1,%xmm10
7232 69,15,94,209, //divps %xmm9,%xmm10
7233 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
7234 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
7235 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007236 184,0,0,128,63, //mov $0x3f800000,%eax
7237 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05007238 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7239 69,15,84,226, //andps %xmm10,%xmm12
7240 69,15,87,210, //xorps %xmm10,%xmm10
7241 69,15,92,220, //subps %xmm12,%xmm11
7242 69,15,89,217, //mulps %xmm9,%xmm11
7243 65,15,92,203, //subps %xmm11,%xmm1
7244 65,15,92,200, //subps %xmm8,%xmm1
7245 68,15,92,209, //subps %xmm1,%xmm10
7246 65,15,84,202, //andps %xmm10,%xmm1
7247 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
7248 102,69,15,254,200, //paddd %xmm8,%xmm9
7249 65,15,93,201, //minps %xmm9,%xmm1
7250 72,173, //lods %ds:(%rsi),%rax
7251 255,224, //jmpq *%rax
7252};
7253
Mike Kleine9ed07d2017-03-07 12:28:11 -05007254CODE const uint8_t sk_luminance_to_alpha_sse2[] = {
Mike Klein5224f462017-03-07 17:29:54 -05007255 184,208,179,89,62, //mov $0x3e59b3d0,%eax
7256 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05007257 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
7258 15,89,216, //mulps %xmm0,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05007259 184,89,23,55,63, //mov $0x3f371759,%eax
7260 102,15,110,192, //movd %eax,%xmm0
7261 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7262 15,89,193, //mulps %xmm1,%xmm0
7263 15,88,195, //addps %xmm3,%xmm0
7264 184,152,221,147,61, //mov $0x3d93dd98,%eax
7265 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05007266 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
7267 15,89,218, //mulps %xmm2,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05007268 15,88,216, //addps %xmm0,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05007269 72,173, //lods %ds:(%rsi),%rax
7270 15,87,192, //xorps %xmm0,%xmm0
7271 15,87,201, //xorps %xmm1,%xmm1
7272 15,87,210, //xorps %xmm2,%xmm2
7273 255,224, //jmpq *%rax
7274};
7275
Mike Klein894d5612017-03-07 07:59:52 -05007276CODE const uint8_t sk_matrix_2x3_sse2[] = {
7277 68,15,40,201, //movaps %xmm1,%xmm9
7278 68,15,40,192, //movaps %xmm0,%xmm8
7279 72,173, //lods %ds:(%rsi),%rax
7280 243,15,16,0, //movss (%rax),%xmm0
7281 243,15,16,72,4, //movss 0x4(%rax),%xmm1
7282 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7283 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
7284 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7285 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
7286 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7287 69,15,89,209, //mulps %xmm9,%xmm10
7288 69,15,88,211, //addps %xmm11,%xmm10
7289 65,15,89,192, //mulps %xmm8,%xmm0
7290 65,15,88,194, //addps %xmm10,%xmm0
7291 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7292 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
7293 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7294 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
7295 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7296 69,15,89,209, //mulps %xmm9,%xmm10
7297 69,15,88,211, //addps %xmm11,%xmm10
7298 65,15,89,200, //mulps %xmm8,%xmm1
7299 65,15,88,202, //addps %xmm10,%xmm1
7300 72,173, //lods %ds:(%rsi),%rax
7301 255,224, //jmpq *%rax
7302};
7303
7304CODE const uint8_t sk_matrix_3x4_sse2[] = {
7305 68,15,40,201, //movaps %xmm1,%xmm9
7306 68,15,40,192, //movaps %xmm0,%xmm8
7307 72,173, //lods %ds:(%rsi),%rax
7308 243,15,16,0, //movss (%rax),%xmm0
7309 243,15,16,72,4, //movss 0x4(%rax),%xmm1
7310 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7311 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
7312 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7313 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
7314 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7315 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
7316 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7317 68,15,89,218, //mulps %xmm2,%xmm11
7318 69,15,88,220, //addps %xmm12,%xmm11
7319 69,15,89,209, //mulps %xmm9,%xmm10
7320 69,15,88,211, //addps %xmm11,%xmm10
7321 65,15,89,192, //mulps %xmm8,%xmm0
7322 65,15,88,194, //addps %xmm10,%xmm0
7323 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7324 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
7325 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7326 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
7327 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7328 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
7329 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7330 68,15,89,218, //mulps %xmm2,%xmm11
7331 69,15,88,220, //addps %xmm12,%xmm11
7332 69,15,89,209, //mulps %xmm9,%xmm10
7333 69,15,88,211, //addps %xmm11,%xmm10
7334 65,15,89,200, //mulps %xmm8,%xmm1
7335 65,15,88,202, //addps %xmm10,%xmm1
7336 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
7337 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7338 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
7339 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7340 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
7341 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7342 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
7343 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7344 68,15,89,226, //mulps %xmm2,%xmm12
7345 69,15,88,229, //addps %xmm13,%xmm12
7346 69,15,89,217, //mulps %xmm9,%xmm11
7347 69,15,88,220, //addps %xmm12,%xmm11
7348 69,15,89,208, //mulps %xmm8,%xmm10
7349 69,15,88,211, //addps %xmm11,%xmm10
7350 72,173, //lods %ds:(%rsi),%rax
7351 65,15,40,210, //movaps %xmm10,%xmm2
7352 255,224, //jmpq *%rax
7353};
7354
Mike Kleine9ed07d2017-03-07 12:28:11 -05007355CODE const uint8_t sk_matrix_4x5_sse2[] = {
7356 68,15,40,201, //movaps %xmm1,%xmm9
7357 68,15,40,192, //movaps %xmm0,%xmm8
7358 72,173, //lods %ds:(%rsi),%rax
7359 243,15,16,0, //movss (%rax),%xmm0
7360 243,15,16,72,4, //movss 0x4(%rax),%xmm1
7361 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7362 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
7363 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7364 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
7365 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7366 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
7367 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7368 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
7369 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7370 68,15,89,227, //mulps %xmm3,%xmm12
7371 69,15,88,229, //addps %xmm13,%xmm12
7372 68,15,89,218, //mulps %xmm2,%xmm11
7373 69,15,88,220, //addps %xmm12,%xmm11
7374 69,15,89,209, //mulps %xmm9,%xmm10
7375 69,15,88,211, //addps %xmm11,%xmm10
7376 65,15,89,192, //mulps %xmm8,%xmm0
7377 65,15,88,194, //addps %xmm10,%xmm0
7378 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7379 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
7380 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7381 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
7382 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7383 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
7384 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7385 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
7386 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7387 68,15,89,227, //mulps %xmm3,%xmm12
7388 69,15,88,229, //addps %xmm13,%xmm12
7389 68,15,89,218, //mulps %xmm2,%xmm11
7390 69,15,88,220, //addps %xmm12,%xmm11
7391 69,15,89,209, //mulps %xmm9,%xmm10
7392 69,15,88,211, //addps %xmm11,%xmm10
7393 65,15,89,200, //mulps %xmm8,%xmm1
7394 65,15,88,202, //addps %xmm10,%xmm1
7395 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
7396 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7397 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
7398 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7399 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
7400 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7401 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
7402 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7403 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
7404 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
7405 68,15,89,235, //mulps %xmm3,%xmm13
7406 69,15,88,238, //addps %xmm14,%xmm13
7407 68,15,89,226, //mulps %xmm2,%xmm12
7408 69,15,88,229, //addps %xmm13,%xmm12
7409 69,15,89,217, //mulps %xmm9,%xmm11
7410 69,15,88,220, //addps %xmm12,%xmm11
7411 69,15,89,208, //mulps %xmm8,%xmm10
7412 69,15,88,211, //addps %xmm11,%xmm10
7413 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
7414 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7415 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
7416 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7417 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
7418 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7419 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
7420 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
7421 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
7422 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
7423 68,15,89,243, //mulps %xmm3,%xmm14
7424 69,15,88,247, //addps %xmm15,%xmm14
7425 68,15,89,234, //mulps %xmm2,%xmm13
7426 69,15,88,238, //addps %xmm14,%xmm13
7427 69,15,89,225, //mulps %xmm9,%xmm12
7428 69,15,88,229, //addps %xmm13,%xmm12
7429 69,15,89,216, //mulps %xmm8,%xmm11
7430 69,15,88,220, //addps %xmm12,%xmm11
7431 72,173, //lods %ds:(%rsi),%rax
7432 65,15,40,210, //movaps %xmm10,%xmm2
7433 65,15,40,219, //movaps %xmm11,%xmm3
7434 255,224, //jmpq *%rax
7435};
7436
Mike Klein894d5612017-03-07 07:59:52 -05007437CODE const uint8_t sk_matrix_perspective_sse2[] = {
7438 68,15,40,192, //movaps %xmm0,%xmm8
7439 72,173, //lods %ds:(%rsi),%rax
7440 243,15,16,0, //movss (%rax),%xmm0
7441 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
7442 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7443 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7444 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
7445 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7446 68,15,89,201, //mulps %xmm1,%xmm9
7447 69,15,88,202, //addps %xmm10,%xmm9
7448 65,15,89,192, //mulps %xmm8,%xmm0
7449 65,15,88,193, //addps %xmm9,%xmm0
7450 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
7451 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7452 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
7453 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7454 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
7455 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7456 68,15,89,209, //mulps %xmm1,%xmm10
7457 69,15,88,211, //addps %xmm11,%xmm10
7458 69,15,89,200, //mulps %xmm8,%xmm9
7459 69,15,88,202, //addps %xmm10,%xmm9
7460 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
7461 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7462 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
7463 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7464 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
7465 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7466 68,15,89,217, //mulps %xmm1,%xmm11
7467 69,15,88,220, //addps %xmm12,%xmm11
7468 69,15,89,208, //mulps %xmm8,%xmm10
7469 69,15,88,211, //addps %xmm11,%xmm10
7470 65,15,83,202, //rcpps %xmm10,%xmm1
7471 15,89,193, //mulps %xmm1,%xmm0
7472 68,15,89,201, //mulps %xmm1,%xmm9
7473 72,173, //lods %ds:(%rsi),%rax
7474 65,15,40,201, //movaps %xmm9,%xmm1
7475 255,224, //jmpq *%rax
7476};
7477
7478CODE const uint8_t sk_linear_gradient_2stops_sse2[] = {
7479 72,173, //lods %ds:(%rsi),%rax
7480 68,15,16,8, //movups (%rax),%xmm9
7481 15,16,88,16, //movups 0x10(%rax),%xmm3
7482 68,15,40,195, //movaps %xmm3,%xmm8
7483 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
7484 65,15,40,201, //movaps %xmm9,%xmm1
7485 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7486 68,15,89,192, //mulps %xmm0,%xmm8
7487 68,15,88,193, //addps %xmm1,%xmm8
7488 15,40,203, //movaps %xmm3,%xmm1
7489 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
7490 65,15,40,209, //movaps %xmm9,%xmm2
7491 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
7492 15,89,200, //mulps %xmm0,%xmm1
7493 15,88,202, //addps %xmm2,%xmm1
7494 15,40,211, //movaps %xmm3,%xmm2
7495 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
7496 69,15,40,209, //movaps %xmm9,%xmm10
7497 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
7498 15,89,208, //mulps %xmm0,%xmm2
7499 65,15,88,210, //addps %xmm10,%xmm2
7500 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
7501 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
7502 15,89,216, //mulps %xmm0,%xmm3
7503 65,15,88,217, //addps %xmm9,%xmm3
7504 72,173, //lods %ds:(%rsi),%rax
7505 65,15,40,192, //movaps %xmm8,%xmm0
7506 255,224, //jmpq *%rax
7507};
7508#elif defined(_M_X64)
7509
7510CODE const uint8_t sk_start_pipeline_hsw[] = {
7511 65,87, //push %r15
7512 65,86, //push %r14
7513 65,85, //push %r13
7514 65,84, //push %r12
7515 86, //push %rsi
7516 87, //push %rdi
7517 83, //push %rbx
7518 72,129,236,160,0,0,0, //sub $0xa0,%rsp
7519 197,120,41,188,36,144,0,0,0, //vmovaps %xmm15,0x90(%rsp)
7520 197,120,41,180,36,128,0,0,0, //vmovaps %xmm14,0x80(%rsp)
7521 197,120,41,108,36,112, //vmovaps %xmm13,0x70(%rsp)
7522 197,120,41,100,36,96, //vmovaps %xmm12,0x60(%rsp)
7523 197,120,41,92,36,80, //vmovaps %xmm11,0x50(%rsp)
7524 197,120,41,84,36,64, //vmovaps %xmm10,0x40(%rsp)
7525 197,120,41,76,36,48, //vmovaps %xmm9,0x30(%rsp)
7526 197,120,41,68,36,32, //vmovaps %xmm8,0x20(%rsp)
7527 197,248,41,124,36,16, //vmovaps %xmm7,0x10(%rsp)
7528 197,248,41,52,36, //vmovaps %xmm6,(%rsp)
7529 77,137,205, //mov %r9,%r13
7530 77,137,198, //mov %r8,%r14
7531 72,137,203, //mov %rcx,%rbx
7532 72,137,214, //mov %rdx,%rsi
7533 72,173, //lods %ds:(%rsi),%rax
7534 73,137,199, //mov %rax,%r15
7535 73,137,244, //mov %rsi,%r12
7536 72,141,67,8, //lea 0x8(%rbx),%rax
7537 76,57,232, //cmp %r13,%rax
7538 118,5, //jbe 75 <_sk_start_pipeline_hsw+0x75>
7539 72,137,223, //mov %rbx,%rdi
7540 235,65, //jmp b6 <_sk_start_pipeline_hsw+0xb6>
7541 185,0,0,0,0, //mov $0x0,%ecx
7542 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
7543 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
7544 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
7545 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7546 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
7547 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
7548 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
7549 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
7550 72,137,223, //mov %rbx,%rdi
7551 76,137,230, //mov %r12,%rsi
7552 76,137,242, //mov %r14,%rdx
7553 65,255,215, //callq *%r15
7554 72,141,123,8, //lea 0x8(%rbx),%rdi
7555 72,131,195,16, //add $0x10,%rbx
7556 76,57,235, //cmp %r13,%rbx
7557 72,137,251, //mov %rdi,%rbx
7558 118,191, //jbe 75 <_sk_start_pipeline_hsw+0x75>
7559 76,137,233, //mov %r13,%rcx
7560 72,41,249, //sub %rdi,%rcx
7561 116,41, //je e7 <_sk_start_pipeline_hsw+0xe7>
7562 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
7563 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
7564 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
7565 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7566 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
7567 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
7568 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
7569 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
7570 76,137,230, //mov %r12,%rsi
7571 76,137,242, //mov %r14,%rdx
7572 65,255,215, //callq *%r15
7573 76,137,232, //mov %r13,%rax
7574 197,248,40,52,36, //vmovaps (%rsp),%xmm6
7575 197,248,40,124,36,16, //vmovaps 0x10(%rsp),%xmm7
7576 197,120,40,68,36,32, //vmovaps 0x20(%rsp),%xmm8
7577 197,120,40,76,36,48, //vmovaps 0x30(%rsp),%xmm9
7578 197,120,40,84,36,64, //vmovaps 0x40(%rsp),%xmm10
7579 197,120,40,92,36,80, //vmovaps 0x50(%rsp),%xmm11
7580 197,120,40,100,36,96, //vmovaps 0x60(%rsp),%xmm12
7581 197,120,40,108,36,112, //vmovaps 0x70(%rsp),%xmm13
7582 197,120,40,180,36,128,0,0,0, //vmovaps 0x80(%rsp),%xmm14
7583 197,120,40,188,36,144,0,0,0, //vmovaps 0x90(%rsp),%xmm15
7584 72,129,196,160,0,0,0, //add $0xa0,%rsp
7585 91, //pop %rbx
7586 95, //pop %rdi
7587 94, //pop %rsi
7588 65,92, //pop %r12
7589 65,93, //pop %r13
7590 65,94, //pop %r14
7591 65,95, //pop %r15
7592 197,248,119, //vzeroupper
7593 195, //retq
7594};
7595
7596CODE const uint8_t sk_just_return_hsw[] = {
7597 195, //retq
7598};
7599
7600CODE const uint8_t sk_seed_shader_hsw[] = {
7601 72,173, //lods %ds:(%rsi),%rax
7602 197,249,110,199, //vmovd %edi,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07007603 196,226,125,88,192, //vpbroadcastd %xmm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05007604 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007605 65,184,0,0,0,63, //mov $0x3f000000,%r8d
7606 196,193,121,110,200, //vmovd %r8d,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07007607 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05007608 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05007609 197,252,88,2, //vaddps (%rdx),%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05007610 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
7611 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
7612 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007613 184,0,0,128,63, //mov $0x3f800000,%eax
7614 197,249,110,208, //vmovd %eax,%xmm2
Mike Klein64b97482017-03-14 17:35:04 -07007615 196,226,125,88,210, //vpbroadcastd %xmm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05007616 72,173, //lods %ds:(%rsi),%rax
7617 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7618 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
7619 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
7620 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
7621 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
7622 255,224, //jmpq *%rax
7623};
7624
7625CODE const uint8_t sk_constant_color_hsw[] = {
7626 72,173, //lods %ds:(%rsi),%rax
7627 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
7628 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
7629 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
7630 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
7631 72,173, //lods %ds:(%rsi),%rax
7632 255,224, //jmpq *%rax
7633};
7634
7635CODE const uint8_t sk_clear_hsw[] = {
7636 72,173, //lods %ds:(%rsi),%rax
7637 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
7638 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
7639 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
7640 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7641 255,224, //jmpq *%rax
7642};
7643
7644CODE const uint8_t sk_plus__hsw[] = {
7645 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
7646 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
7647 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
7648 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
7649 72,173, //lods %ds:(%rsi),%rax
7650 255,224, //jmpq *%rax
7651};
7652
7653CODE const uint8_t sk_srcover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007654 184,0,0,128,63, //mov $0x3f800000,%eax
7655 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07007656 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007657 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
7658 196,194,93,184,192, //vfmadd231ps %ymm8,%ymm4,%ymm0
7659 196,194,85,184,200, //vfmadd231ps %ymm8,%ymm5,%ymm1
7660 196,194,77,184,208, //vfmadd231ps %ymm8,%ymm6,%ymm2
7661 196,194,69,184,216, //vfmadd231ps %ymm8,%ymm7,%ymm3
7662 72,173, //lods %ds:(%rsi),%rax
7663 255,224, //jmpq *%rax
7664};
7665
7666CODE const uint8_t sk_dstover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007667 184,0,0,128,63, //mov $0x3f800000,%eax
7668 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07007669 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007670 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
7671 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
7672 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
7673 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
7674 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
7675 72,173, //lods %ds:(%rsi),%rax
7676 255,224, //jmpq *%rax
7677};
7678
7679CODE const uint8_t sk_clamp_0_hsw[] = {
7680 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
7681 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
7682 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
7683 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
7684 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
7685 72,173, //lods %ds:(%rsi),%rax
7686 255,224, //jmpq *%rax
7687};
7688
7689CODE const uint8_t sk_clamp_1_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007690 184,0,0,128,63, //mov $0x3f800000,%eax
7691 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07007692 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007693 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
7694 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
7695 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
7696 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
7697 72,173, //lods %ds:(%rsi),%rax
7698 255,224, //jmpq *%rax
7699};
7700
7701CODE const uint8_t sk_clamp_a_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007702 184,0,0,128,63, //mov $0x3f800000,%eax
7703 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07007704 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007705 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
7706 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
7707 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
7708 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
7709 72,173, //lods %ds:(%rsi),%rax
7710 255,224, //jmpq *%rax
7711};
7712
7713CODE const uint8_t sk_set_rgb_hsw[] = {
7714 72,173, //lods %ds:(%rsi),%rax
7715 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
7716 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
7717 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
7718 72,173, //lods %ds:(%rsi),%rax
7719 255,224, //jmpq *%rax
7720};
7721
7722CODE const uint8_t sk_swap_rb_hsw[] = {
7723 197,124,40,192, //vmovaps %ymm0,%ymm8
7724 72,173, //lods %ds:(%rsi),%rax
7725 197,252,40,194, //vmovaps %ymm2,%ymm0
7726 197,124,41,194, //vmovaps %ymm8,%ymm2
7727 255,224, //jmpq *%rax
7728};
7729
7730CODE const uint8_t sk_swap_hsw[] = {
7731 197,124,40,195, //vmovaps %ymm3,%ymm8
7732 197,124,40,202, //vmovaps %ymm2,%ymm9
7733 197,124,40,209, //vmovaps %ymm1,%ymm10
7734 197,124,40,216, //vmovaps %ymm0,%ymm11
7735 72,173, //lods %ds:(%rsi),%rax
7736 197,252,40,196, //vmovaps %ymm4,%ymm0
7737 197,252,40,205, //vmovaps %ymm5,%ymm1
7738 197,252,40,214, //vmovaps %ymm6,%ymm2
7739 197,252,40,223, //vmovaps %ymm7,%ymm3
7740 197,124,41,220, //vmovaps %ymm11,%ymm4
7741 197,124,41,213, //vmovaps %ymm10,%ymm5
7742 197,124,41,206, //vmovaps %ymm9,%ymm6
7743 197,124,41,199, //vmovaps %ymm8,%ymm7
7744 255,224, //jmpq *%rax
7745};
7746
7747CODE const uint8_t sk_move_src_dst_hsw[] = {
7748 72,173, //lods %ds:(%rsi),%rax
7749 197,252,40,224, //vmovaps %ymm0,%ymm4
7750 197,252,40,233, //vmovaps %ymm1,%ymm5
7751 197,252,40,242, //vmovaps %ymm2,%ymm6
7752 197,252,40,251, //vmovaps %ymm3,%ymm7
7753 255,224, //jmpq *%rax
7754};
7755
7756CODE const uint8_t sk_move_dst_src_hsw[] = {
7757 72,173, //lods %ds:(%rsi),%rax
7758 197,252,40,196, //vmovaps %ymm4,%ymm0
7759 197,252,40,205, //vmovaps %ymm5,%ymm1
7760 197,252,40,214, //vmovaps %ymm6,%ymm2
7761 197,252,40,223, //vmovaps %ymm7,%ymm3
7762 255,224, //jmpq *%rax
7763};
7764
7765CODE const uint8_t sk_premul_hsw[] = {
7766 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
7767 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
7768 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
7769 72,173, //lods %ds:(%rsi),%rax
7770 255,224, //jmpq *%rax
7771};
7772
7773CODE const uint8_t sk_unpremul_hsw[] = {
7774 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
7775 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007776 184,0,0,128,63, //mov $0x3f800000,%eax
7777 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07007778 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05007779 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
7780 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
7781 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
7782 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
7783 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
7784 72,173, //lods %ds:(%rsi),%rax
7785 255,224, //jmpq *%rax
7786};
7787
7788CODE const uint8_t sk_from_srgb_hsw[] = {
Mike Klein5224f462017-03-07 17:29:54 -05007789 184,145,131,158,61, //mov $0x3d9e8391,%eax
7790 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07007791 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007792 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
7793 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05007794 184,154,153,153,62, //mov $0x3e99999a,%eax
7795 197,121,110,216, //vmovd %eax,%xmm11
Mike Klein64b97482017-03-14 17:35:04 -07007796 196,66,125,88,219, //vpbroadcastd %xmm11,%ymm11
Mike Klein5224f462017-03-07 17:29:54 -05007797 184,92,143,50,63, //mov $0x3f328f5c,%eax
7798 197,121,110,224, //vmovd %eax,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -07007799 196,66,125,88,228, //vpbroadcastd %xmm12,%ymm12
7800 196,65,125,111,235, //vmovdqa %ymm11,%ymm13
Mike Klein894d5612017-03-07 07:59:52 -05007801 196,66,125,168,236, //vfmadd213ps %ymm12,%ymm0,%ymm13
Mike Klein5224f462017-03-07 17:29:54 -05007802 184,10,215,35,59, //mov $0x3b23d70a,%eax
7803 197,121,110,240, //vmovd %eax,%xmm14
Mike Klein64b97482017-03-14 17:35:04 -07007804 196,66,125,88,246, //vpbroadcastd %xmm14,%ymm14
Mike Klein894d5612017-03-07 07:59:52 -05007805 196,66,45,168,238, //vfmadd213ps %ymm14,%ymm10,%ymm13
Mike Klein5224f462017-03-07 17:29:54 -05007806 184,174,71,97,61, //mov $0x3d6147ae,%eax
7807 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07007808 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05007809 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
7810 196,195,21,74,193,0, //vblendvps %ymm0,%ymm9,%ymm13,%ymm0
7811 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
7812 197,116,89,233, //vmulps %ymm1,%ymm1,%ymm13
Mike Klein64b97482017-03-14 17:35:04 -07007813 196,65,125,111,251, //vmovdqa %ymm11,%ymm15
Mike Klein894d5612017-03-07 07:59:52 -05007814 196,66,117,168,252, //vfmadd213ps %ymm12,%ymm1,%ymm15
7815 196,66,21,168,254, //vfmadd213ps %ymm14,%ymm13,%ymm15
7816 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
7817 196,195,5,74,201,16, //vblendvps %ymm1,%ymm9,%ymm15,%ymm1
7818 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
7819 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
7820 196,66,109,168,220, //vfmadd213ps %ymm12,%ymm2,%ymm11
7821 196,66,53,168,222, //vfmadd213ps %ymm14,%ymm9,%ymm11
7822 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
7823 196,195,37,74,208,32, //vblendvps %ymm2,%ymm8,%ymm11,%ymm2
7824 72,173, //lods %ds:(%rsi),%rax
7825 255,224, //jmpq *%rax
7826};
7827
7828CODE const uint8_t sk_to_srgb_hsw[] = {
7829 197,124,82,192, //vrsqrtps %ymm0,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05007830 196,65,124,83,216, //vrcpps %ymm8,%ymm11
7831 196,65,124,82,224, //vrsqrtps %ymm8,%ymm12
7832 184,41,92,71,65, //mov $0x41475c29,%eax
7833 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07007834 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05007835 197,60,89,232, //vmulps %ymm0,%ymm8,%ymm13
7836 184,0,0,128,63, //mov $0x3f800000,%eax
7837 197,121,110,200, //vmovd %eax,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07007838 196,66,125,88,201, //vpbroadcastd %xmm9,%ymm9
Mike Klein5224f462017-03-07 17:29:54 -05007839 184,194,135,210,62, //mov $0x3ed287c2,%eax
7840 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07007841 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05007842 184,206,111,48,63, //mov $0x3f306fce,%eax
7843 197,121,110,240, //vmovd %eax,%xmm14
Mike Klein64b97482017-03-14 17:35:04 -07007844 196,66,125,88,246, //vpbroadcastd %xmm14,%ymm14
Mike Klein5224f462017-03-07 17:29:54 -05007845 184,168,87,202,61, //mov $0x3dca57a8,%eax
7846 53,0,0,0,128, //xor $0x80000000,%eax
7847 197,121,110,248, //vmovd %eax,%xmm15
Mike Klein64b97482017-03-14 17:35:04 -07007848 196,66,125,88,255, //vpbroadcastd %xmm15,%ymm15
Mike Klein894d5612017-03-07 07:59:52 -05007849 196,66,13,168,223, //vfmadd213ps %ymm15,%ymm14,%ymm11
Mike Klein5224f462017-03-07 17:29:54 -05007850 196,66,45,184,220, //vfmadd231ps %ymm12,%ymm10,%ymm11
7851 196,65,52,93,219, //vminps %ymm11,%ymm9,%ymm11
7852 184,4,231,140,59, //mov $0x3b8ce704,%eax
7853 197,121,110,224, //vmovd %eax,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -07007854 196,66,125,88,228, //vpbroadcastd %xmm12,%ymm12
Mike Klein5224f462017-03-07 17:29:54 -05007855 196,193,124,194,196,1, //vcmpltps %ymm12,%ymm0,%ymm0
7856 196,195,37,74,197,0, //vblendvps %ymm0,%ymm13,%ymm11,%ymm0
7857 197,124,82,217, //vrsqrtps %ymm1,%ymm11
7858 196,65,124,83,235, //vrcpps %ymm11,%ymm13
7859 196,65,124,82,219, //vrsqrtps %ymm11,%ymm11
7860 196,66,13,168,239, //vfmadd213ps %ymm15,%ymm14,%ymm13
7861 196,66,45,184,235, //vfmadd231ps %ymm11,%ymm10,%ymm13
7862 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
7863 196,65,52,93,237, //vminps %ymm13,%ymm9,%ymm13
7864 196,193,116,194,204,1, //vcmpltps %ymm12,%ymm1,%ymm1
7865 196,195,21,74,203,16, //vblendvps %ymm1,%ymm11,%ymm13,%ymm1
7866 197,124,82,218, //vrsqrtps %ymm2,%ymm11
7867 196,65,124,83,235, //vrcpps %ymm11,%ymm13
7868 196,66,13,168,239, //vfmadd213ps %ymm15,%ymm14,%ymm13
7869 196,65,124,82,219, //vrsqrtps %ymm11,%ymm11
7870 196,66,45,184,235, //vfmadd231ps %ymm11,%ymm10,%ymm13
7871 196,65,52,93,205, //vminps %ymm13,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05007872 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05007873 196,193,108,194,212,1, //vcmpltps %ymm12,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05007874 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
7875 72,173, //lods %ds:(%rsi),%rax
7876 255,224, //jmpq *%rax
7877};
7878
7879CODE const uint8_t sk_scale_1_float_hsw[] = {
7880 72,173, //lods %ds:(%rsi),%rax
7881 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
7882 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
7883 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
7884 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
7885 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
7886 72,173, //lods %ds:(%rsi),%rax
7887 255,224, //jmpq *%rax
7888};
7889
7890CODE const uint8_t sk_scale_u8_hsw[] = {
7891 73,137,200, //mov %rcx,%r8
7892 72,173, //lods %ds:(%rsi),%rax
7893 72,139,0, //mov (%rax),%rax
7894 72,1,248, //add %rdi,%rax
7895 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05007896 117,56, //jne 556 <_sk_scale_u8_hsw+0x48>
Mike Klein64b97482017-03-14 17:35:04 -07007897 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05007898 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
7899 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007900 184,129,128,128,59, //mov $0x3b808081,%eax
7901 197,121,110,200, //vmovd %eax,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07007902 196,66,125,88,201, //vpbroadcastd %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05007903 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
7904 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
7905 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
7906 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
7907 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
7908 72,173, //lods %ds:(%rsi),%rax
7909 76,137,193, //mov %r8,%rcx
7910 255,224, //jmpq *%rax
7911 49,201, //xor %ecx,%ecx
7912 77,137,194, //mov %r8,%r10
7913 69,49,201, //xor %r9d,%r9d
7914 68,15,182,24, //movzbl (%rax),%r11d
7915 72,255,192, //inc %rax
7916 73,211,227, //shl %cl,%r11
7917 77,9,217, //or %r11,%r9
7918 72,131,193,8, //add $0x8,%rcx
7919 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05007920 117,234, //jne 55e <_sk_scale_u8_hsw+0x50>
Mike Klein894d5612017-03-07 07:59:52 -05007921 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05007922 235,167, //jmp 522 <_sk_scale_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05007923};
7924
7925CODE const uint8_t sk_lerp_1_float_hsw[] = {
7926 72,173, //lods %ds:(%rsi),%rax
7927 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
7928 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
7929 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
7930 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
7931 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
7932 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
7933 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
7934 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
7935 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
7936 72,173, //lods %ds:(%rsi),%rax
7937 255,224, //jmpq *%rax
7938};
7939
7940CODE const uint8_t sk_lerp_u8_hsw[] = {
7941 73,137,200, //mov %rcx,%r8
7942 72,173, //lods %ds:(%rsi),%rax
7943 72,139,0, //mov (%rax),%rax
7944 72,1,248, //add %rdi,%rax
7945 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05007946 117,76, //jne 606 <_sk_lerp_u8_hsw+0x5c>
Mike Klein64b97482017-03-14 17:35:04 -07007947 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05007948 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
7949 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007950 184,129,128,128,59, //mov $0x3b808081,%eax
7951 197,121,110,200, //vmovd %eax,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -07007952 196,66,125,88,201, //vpbroadcastd %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05007953 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
7954 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
7955 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
7956 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
7957 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
7958 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
7959 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
7960 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
7961 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
7962 72,173, //lods %ds:(%rsi),%rax
7963 76,137,193, //mov %r8,%rcx
7964 255,224, //jmpq *%rax
7965 49,201, //xor %ecx,%ecx
7966 77,137,194, //mov %r8,%r10
7967 69,49,201, //xor %r9d,%r9d
7968 68,15,182,24, //movzbl (%rax),%r11d
7969 72,255,192, //inc %rax
7970 73,211,227, //shl %cl,%r11
7971 77,9,217, //or %r11,%r9
7972 72,131,193,8, //add $0x8,%rcx
7973 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05007974 117,234, //jne 60e <_sk_lerp_u8_hsw+0x64>
Mike Klein894d5612017-03-07 07:59:52 -05007975 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05007976 235,147, //jmp 5be <_sk_lerp_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05007977};
7978
7979CODE const uint8_t sk_lerp_565_hsw[] = {
7980 72,173, //lods %ds:(%rsi),%rax
7981 76,139,16, //mov (%rax),%r10
7982 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05007983 15,133,179,0,0,0, //jne 6ec <_sk_lerp_565_hsw+0xc1>
Mike Klein894d5612017-03-07 07:59:52 -05007984 196,193,122,111,28,122, //vmovdqu (%r10,%rdi,2),%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05007985 196,98,125,51,195, //vpmovzxwd %xmm3,%ymm8
7986 184,0,248,0,0, //mov $0xf800,%eax
7987 197,249,110,216, //vmovd %eax,%xmm3
7988 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
7989 196,193,101,219,216, //vpand %ymm8,%ymm3,%ymm3
7990 197,124,91,203, //vcvtdq2ps %ymm3,%ymm9
7991 184,8,33,132,55, //mov $0x37842108,%eax
7992 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07007993 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05007994 197,52,89,203, //vmulps %ymm3,%ymm9,%ymm9
7995 184,224,7,0,0, //mov $0x7e0,%eax
7996 197,249,110,216, //vmovd %eax,%xmm3
7997 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
7998 196,193,101,219,216, //vpand %ymm8,%ymm3,%ymm3
7999 197,124,91,211, //vcvtdq2ps %ymm3,%ymm10
8000 184,33,8,2,58, //mov $0x3a020821,%eax
8001 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008002 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008003 197,44,89,211, //vmulps %ymm3,%ymm10,%ymm10
8004 184,31,0,0,0, //mov $0x1f,%eax
8005 197,249,110,216, //vmovd %eax,%xmm3
8006 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
8007 196,193,101,219,216, //vpand %ymm8,%ymm3,%ymm3
8008 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
8009 184,8,33,4,61, //mov $0x3d042108,%eax
8010 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008011 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008012 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05008013 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05008014 196,226,53,168,196, //vfmadd213ps %ymm4,%ymm9,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05008015 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05008016 196,226,45,168,205, //vfmadd213ps %ymm5,%ymm10,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05008017 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
8018 196,226,101,168,214, //vfmadd213ps %ymm6,%ymm3,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008019 184,0,0,128,63, //mov $0x3f800000,%eax
8020 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008021 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05008022 72,173, //lods %ds:(%rsi),%rax
8023 255,224, //jmpq *%rax
8024 65,137,200, //mov %ecx,%r8d
8025 65,128,224,7, //and $0x7,%r8b
8026 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
8027 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05008028 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07008029 15,135,59,255,255,255, //ja 63f <_sk_lerp_565_hsw+0x14>
8030 69,15,182,192, //movzbl %r8b,%r8d
Mike Klein5224f462017-03-07 17:29:54 -05008031 76,141,13,73,0,0,0, //lea 0x49(%rip),%r9 # 758 <_sk_lerp_565_hsw+0x12d>
Mike Klein894d5612017-03-07 07:59:52 -05008032 75,99,4,129, //movslq (%r9,%r8,4),%rax
8033 76,1,200, //add %r9,%rax
8034 255,224, //jmpq *%rax
8035 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
8036 196,193,97,196,92,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm3
8037 196,193,97,196,92,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm3,%xmm3
8038 196,193,97,196,92,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm3,%xmm3
8039 196,193,97,196,92,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm3,%xmm3
8040 196,193,97,196,92,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
8041 196,193,97,196,92,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
8042 196,193,97,196,28,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -05008043 233,231,254,255,255, //jmpq 63f <_sk_lerp_565_hsw+0x14>
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008044 244, //hlt
Mike Klein894d5612017-03-07 07:59:52 -05008045 255, //(bad)
8046 255, //(bad)
8047 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008048 236, //in (%dx),%al
Mike Klein894d5612017-03-07 07:59:52 -05008049 255, //(bad)
8050 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008051 255,228, //jmpq *%rsp
Mike Klein894d5612017-03-07 07:59:52 -05008052 255, //(bad)
8053 255, //(bad)
8054 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008055 220,255, //fdivr %st,%st(7)
8056 255, //(bad)
8057 255,212, //callq *%rsp
8058 255, //(bad)
8059 255, //(bad)
8060 255,204, //dec %esp
8061 255, //(bad)
8062 255, //(bad)
8063 255,192, //inc %eax
Mike Klein894d5612017-03-07 07:59:52 -05008064 255, //(bad)
8065 255, //(bad)
8066 255, //.byte 0xff
8067};
8068
8069CODE const uint8_t sk_load_tables_hsw[] = {
8070 73,137,200, //mov %rcx,%r8
8071 72,173, //lods %ds:(%rsi),%rax
8072 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
8073 76,3,8, //add (%rax),%r9
8074 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05008075 117,121, //jne 802 <_sk_load_tables_hsw+0x8e>
Mike Klein894d5612017-03-07 07:59:52 -05008076 196,193,126,111,25, //vmovdqu (%r9),%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008077 185,255,0,0,0, //mov $0xff,%ecx
8078 197,249,110,193, //vmovd %ecx,%xmm0
8079 196,226,125,88,208, //vpbroadcastd %xmm0,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05008080 197,237,219,203, //vpand %ymm3,%ymm2,%ymm1
8081 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
8082 72,139,72,8, //mov 0x8(%rax),%rcx
8083 76,139,72,16, //mov 0x10(%rax),%r9
8084 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
8085 196,226,53,146,4,137, //vgatherdps %ymm9,(%rcx,%ymm1,4),%ymm0
8086 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
8087 197,109,219,201, //vpand %ymm1,%ymm2,%ymm9
8088 196,65,45,118,210, //vpcmpeqd %ymm10,%ymm10,%ymm10
8089 196,130,45,146,12,137, //vgatherdps %ymm10,(%r9,%ymm9,4),%ymm1
8090 72,139,64,24, //mov 0x18(%rax),%rax
8091 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
8092 196,65,109,219,201, //vpand %ymm9,%ymm2,%ymm9
8093 196,162,61,146,20,136, //vgatherdps %ymm8,(%rax,%ymm9,4),%ymm2
8094 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008095 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
8096 184,129,128,128,59, //mov $0x3b808081,%eax
8097 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008098 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008099 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05008100 72,173, //lods %ds:(%rsi),%rax
8101 76,137,193, //mov %r8,%rcx
8102 255,224, //jmpq *%rax
8103 185,8,0,0,0, //mov $0x8,%ecx
8104 68,41,193, //sub %r8d,%ecx
8105 192,225,3, //shl $0x3,%cl
8106 73,199,194,255,255,255,255, //mov $0xffffffffffffffff,%r10
8107 73,211,234, //shr %cl,%r10
8108 196,193,249,110,194, //vmovq %r10,%xmm0
8109 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
8110 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008111 233,99,255,255,255, //jmpq 78e <_sk_load_tables_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05008112};
8113
8114CODE const uint8_t sk_load_a8_hsw[] = {
8115 73,137,200, //mov %rcx,%r8
8116 72,173, //lods %ds:(%rsi),%rax
8117 72,139,0, //mov (%rax),%rax
8118 72,1,248, //add %rdi,%rax
8119 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05008120 117,50, //jne 86d <_sk_load_a8_hsw+0x42>
Mike Klein64b97482017-03-14 17:35:04 -07008121 197,250,126,0, //vmovq (%rax),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05008122 196,226,125,49,192, //vpmovzxbd %xmm0,%ymm0
8123 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05008124 184,129,128,128,59, //mov $0x3b808081,%eax
8125 197,249,110,200, //vmovd %eax,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07008126 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05008127 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
8128 72,173, //lods %ds:(%rsi),%rax
8129 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8130 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8131 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8132 76,137,193, //mov %r8,%rcx
8133 255,224, //jmpq *%rax
8134 49,201, //xor %ecx,%ecx
8135 77,137,194, //mov %r8,%r10
8136 69,49,201, //xor %r9d,%r9d
8137 68,15,182,24, //movzbl (%rax),%r11d
8138 72,255,192, //inc %rax
8139 73,211,227, //shl %cl,%r11
8140 77,9,217, //or %r11,%r9
8141 72,131,193,8, //add $0x8,%rcx
8142 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05008143 117,234, //jne 875 <_sk_load_a8_hsw+0x4a>
Mike Klein894d5612017-03-07 07:59:52 -05008144 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05008145 235,173, //jmp 83f <_sk_load_a8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05008146};
8147
8148CODE const uint8_t sk_store_a8_hsw[] = {
8149 72,173, //lods %ds:(%rsi),%rax
8150 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05008151 184,0,0,127,67, //mov $0x437f0000,%eax
8152 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07008153 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008154 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
8155 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
8156 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
8157 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
8158 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
8159 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05008160 117,10, //jne 8cd <_sk_store_a8_hsw+0x3b>
Mike Klein894d5612017-03-07 07:59:52 -05008161 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
8162 72,173, //lods %ds:(%rsi),%rax
8163 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07008164 65,137,200, //mov %ecx,%r8d
8165 65,128,224,7, //and $0x7,%r8b
8166 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05008167 65,128,248,6, //cmp $0x6,%r8b
Mike Klein5224f462017-03-07 17:29:54 -05008168 119,236, //ja 8c9 <_sk_store_a8_hsw+0x37>
Mike Klein894d5612017-03-07 07:59:52 -05008169 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07008170 65,15,182,192, //movzbl %r8b,%eax
8171 76,141,5,67,0,0,0, //lea 0x43(%rip),%r8 # 930 <_sk_store_a8_hsw+0x9e>
8172 73,99,4,128, //movslq (%r8,%rax,4),%rax
8173 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05008174 255,224, //jmpq *%rax
8175 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
8176 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
8177 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
8178 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
8179 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
8180 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
8181 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Klein64b97482017-03-14 17:35:04 -07008182 235,154, //jmp 8c9 <_sk_store_a8_hsw+0x37>
Mike Klein5224f462017-03-07 17:29:54 -05008183 144, //nop
8184 246,255, //idiv %bh
Mike Klein894d5612017-03-07 07:59:52 -05008185 255, //(bad)
8186 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008187 238, //out %al,(%dx)
Mike Klein894d5612017-03-07 07:59:52 -05008188 255, //(bad)
8189 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008190 255,230, //jmpq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05008191 255, //(bad)
8192 255, //(bad)
8193 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008194 222,255, //fdivrp %st,%st(7)
Mike Klein894d5612017-03-07 07:59:52 -05008195 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008196 255,214, //callq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05008197 255, //(bad)
8198 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008199 255,206, //dec %esi
Mike Klein894d5612017-03-07 07:59:52 -05008200 255, //(bad)
8201 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008202 255,198, //inc %esi
Mike Klein894d5612017-03-07 07:59:52 -05008203 255, //(bad)
8204 255, //(bad)
8205 255, //.byte 0xff
8206};
8207
8208CODE const uint8_t sk_load_565_hsw[] = {
8209 72,173, //lods %ds:(%rsi),%rax
8210 76,139,16, //mov (%rax),%r10
8211 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008212 15,133,149,0,0,0, //jne 9ef <_sk_load_565_hsw+0xa3>
Mike Klein894d5612017-03-07 07:59:52 -05008213 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
8214 196,226,125,51,208, //vpmovzxwd %xmm0,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05008215 184,0,248,0,0, //mov $0xf800,%eax
8216 197,249,110,192, //vmovd %eax,%xmm0
8217 196,226,125,88,192, //vpbroadcastd %xmm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05008218 197,253,219,194, //vpand %ymm2,%ymm0,%ymm0
8219 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05008220 184,8,33,132,55, //mov $0x37842108,%eax
8221 197,249,110,200, //vmovd %eax,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07008222 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05008223 197,252,89,193, //vmulps %ymm1,%ymm0,%ymm0
8224 184,224,7,0,0, //mov $0x7e0,%eax
8225 197,249,110,200, //vmovd %eax,%xmm1
8226 196,226,125,88,201, //vpbroadcastd %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05008227 197,245,219,202, //vpand %ymm2,%ymm1,%ymm1
8228 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05008229 184,33,8,2,58, //mov $0x3a020821,%eax
8230 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008231 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008232 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
8233 184,31,0,0,0, //mov $0x1f,%eax
8234 197,249,110,216, //vmovd %eax,%xmm3
8235 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05008236 197,229,219,210, //vpand %ymm2,%ymm3,%ymm2
8237 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05008238 184,8,33,4,61, //mov $0x3d042108,%eax
8239 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008240 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05008241 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
8242 184,0,0,128,63, //mov $0x3f800000,%eax
8243 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008244 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05008245 72,173, //lods %ds:(%rsi),%rax
8246 255,224, //jmpq *%rax
8247 65,137,200, //mov %ecx,%r8d
8248 65,128,224,7, //and $0x7,%r8b
8249 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
8250 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05008251 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07008252 15,135,89,255,255,255, //ja 960 <_sk_load_565_hsw+0x14>
8253 69,15,182,192, //movzbl %r8b,%r8d
8254 76,141,13,74,0,0,0, //lea 0x4a(%rip),%r9 # a5c <_sk_load_565_hsw+0x110>
Mike Klein894d5612017-03-07 07:59:52 -05008255 75,99,4,129, //movslq (%r9,%r8,4),%rax
8256 76,1,200, //add %r9,%rax
8257 255,224, //jmpq *%rax
8258 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
8259 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
8260 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
8261 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
8262 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
8263 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
8264 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
8265 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07008266 233,5,255,255,255, //jmpq 960 <_sk_load_565_hsw+0x14>
Mike Klein5224f462017-03-07 17:29:54 -05008267 144, //nop
8268 243,255, //repz (bad)
Mike Klein894d5612017-03-07 07:59:52 -05008269 255, //(bad)
8270 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07008271 235,255, //jmp a61 <_sk_load_565_hsw+0x115>
Mike Klein894d5612017-03-07 07:59:52 -05008272 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008273 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05008274 255, //(bad)
8275 255, //(bad)
8276 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008277 219,255, //(bad)
Mike Klein894d5612017-03-07 07:59:52 -05008278 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008279 255,211, //callq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05008280 255, //(bad)
8281 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008282 255,203, //dec %ebx
Mike Klein894d5612017-03-07 07:59:52 -05008283 255, //(bad)
8284 255, //(bad)
8285 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008286 191, //.byte 0xbf
Mike Klein894d5612017-03-07 07:59:52 -05008287 255, //(bad)
8288 255, //(bad)
8289 255, //.byte 0xff
8290};
8291
8292CODE const uint8_t sk_store_565_hsw[] = {
8293 72,173, //lods %ds:(%rsi),%rax
8294 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05008295 184,0,0,248,65, //mov $0x41f80000,%eax
8296 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07008297 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008298 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
8299 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
8300 196,193,53,114,241,11, //vpslld $0xb,%ymm9,%ymm9
Mike Klein5224f462017-03-07 17:29:54 -05008301 184,0,0,124,66, //mov $0x427c0000,%eax
8302 197,121,110,208, //vmovd %eax,%xmm10
Mike Klein64b97482017-03-14 17:35:04 -07008303 196,66,125,88,210, //vpbroadcastd %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05008304 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
8305 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
8306 196,193,45,114,242,5, //vpslld $0x5,%ymm10,%ymm10
8307 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
8308 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
8309 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
8310 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
8311 196,67,125,57,193,1, //vextracti128 $0x1,%ymm8,%xmm9
8312 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
8313 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008314 117,10, //jne ae4 <_sk_store_565_hsw+0x6c>
Mike Klein894d5612017-03-07 07:59:52 -05008315 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
8316 72,173, //lods %ds:(%rsi),%rax
8317 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07008318 65,137,200, //mov %ecx,%r8d
8319 65,128,224,7, //and $0x7,%r8b
8320 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05008321 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07008322 119,236, //ja ae0 <_sk_store_565_hsw+0x68>
8323 65,15,182,192, //movzbl %r8b,%eax
8324 76,141,5,69,0,0,0, //lea 0x45(%rip),%r8 # b44 <_sk_store_565_hsw+0xcc>
8325 73,99,4,128, //movslq (%r8,%rax,4),%rax
8326 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05008327 255,224, //jmpq *%rax
8328 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
8329 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
8330 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
8331 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
8332 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
8333 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
Mike Klein64b97482017-03-14 17:35:04 -07008334 196,67,121,21,4,121,0, //vpextrw $0x0,%xmm8,(%r9,%rdi,2)
8335 235,159, //jmp ae0 <_sk_store_565_hsw+0x68>
8336 15,31,0, //nopl (%rax)
Mike Klein5224f462017-03-07 17:29:54 -05008337 244, //hlt
Mike Klein894d5612017-03-07 07:59:52 -05008338 255, //(bad)
8339 255, //(bad)
8340 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008341 236, //in (%dx),%al
Mike Klein894d5612017-03-07 07:59:52 -05008342 255, //(bad)
8343 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008344 255,228, //jmpq *%rsp
Mike Klein894d5612017-03-07 07:59:52 -05008345 255, //(bad)
8346 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05008347 255, //(bad)
8348 220,255, //fdivr %st,%st(7)
8349 255, //(bad)
8350 255,212, //callq *%rsp
8351 255, //(bad)
8352 255, //(bad)
8353 255,204, //dec %esp
8354 255, //(bad)
8355 255, //(bad)
8356 255,196, //inc %esp
Mike Klein894d5612017-03-07 07:59:52 -05008357 255, //(bad)
8358 255, //(bad)
8359 255, //.byte 0xff
8360};
8361
8362CODE const uint8_t sk_load_8888_hsw[] = {
8363 73,137,200, //mov %rcx,%r8
8364 72,173, //lods %ds:(%rsi),%rax
8365 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
8366 76,3,8, //add (%rax),%r9
8367 77,133,192, //test %r8,%r8
Mike Klein64b97482017-03-14 17:35:04 -07008368 117,104, //jne bdd <_sk_load_8888_hsw+0x7d>
Mike Klein894d5612017-03-07 07:59:52 -05008369 196,193,126,111,25, //vmovdqu (%r9),%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008370 184,255,0,0,0, //mov $0xff,%eax
8371 197,249,110,192, //vmovd %eax,%xmm0
8372 196,226,125,88,208, //vpbroadcastd %xmm0,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05008373 197,237,219,195, //vpand %ymm3,%ymm2,%ymm0
8374 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008375 184,129,128,128,59, //mov $0x3b808081,%eax
8376 197,249,110,200, //vmovd %eax,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -07008377 196,98,125,88,193, //vpbroadcastd %xmm1,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008378 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05008379 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
8380 197,237,219,201, //vpand %ymm1,%ymm2,%ymm1
8381 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008382 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05008383 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
8384 196,193,109,219,209, //vpand %ymm9,%ymm2,%ymm2
8385 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008386 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05008387 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
8388 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
8389 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
8390 72,173, //lods %ds:(%rsi),%rax
8391 76,137,193, //mov %r8,%rcx
8392 255,224, //jmpq *%rax
8393 185,8,0,0,0, //mov $0x8,%ecx
8394 68,41,193, //sub %r8d,%ecx
8395 192,225,3, //shl $0x3,%cl
8396 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
8397 72,211,232, //shr %cl,%rax
8398 196,225,249,110,192, //vmovq %rax,%xmm0
8399 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
8400 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Klein64b97482017-03-14 17:35:04 -07008401 233,116,255,255,255, //jmpq b7a <_sk_load_8888_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05008402};
8403
8404CODE const uint8_t sk_store_8888_hsw[] = {
8405 73,137,200, //mov %rcx,%r8
8406 72,173, //lods %ds:(%rsi),%rax
8407 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
8408 76,3,8, //add (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008409 184,0,0,127,67, //mov $0x437f0000,%eax
8410 197,121,110,192, //vmovd %eax,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07008411 196,66,125,88,192, //vpbroadcastd %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008412 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
8413 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
8414 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
8415 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
8416 196,193,45,114,242,8, //vpslld $0x8,%ymm10,%ymm10
8417 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
8418 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
8419 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
8420 196,193,45,114,242,16, //vpslld $0x10,%ymm10,%ymm10
8421 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
8422 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
8423 196,193,61,114,240,24, //vpslld $0x18,%ymm8,%ymm8
8424 196,65,45,235,192, //vpor %ymm8,%ymm10,%ymm8
8425 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
8426 77,133,192, //test %r8,%r8
Mike Klein64b97482017-03-14 17:35:04 -07008427 117,12, //jne c7a <_sk_store_8888_hsw+0x74>
Mike Klein894d5612017-03-07 07:59:52 -05008428 196,65,126,127,1, //vmovdqu %ymm8,(%r9)
8429 72,173, //lods %ds:(%rsi),%rax
8430 76,137,193, //mov %r8,%rcx
8431 255,224, //jmpq *%rax
8432 185,8,0,0,0, //mov $0x8,%ecx
8433 68,41,193, //sub %r8d,%ecx
8434 192,225,3, //shl $0x3,%cl
8435 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
8436 72,211,232, //shr %cl,%rax
8437 196,97,249,110,200, //vmovq %rax,%xmm9
8438 196,66,125,33,201, //vpmovsxbd %xmm9,%ymm9
8439 196,66,53,142,1, //vpmaskmovd %ymm8,%ymm9,(%r9)
Mike Klein64b97482017-03-14 17:35:04 -07008440 235,211, //jmp c73 <_sk_store_8888_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05008441};
8442
8443CODE const uint8_t sk_load_f16_hsw[] = {
8444 72,173, //lods %ds:(%rsi),%rax
8445 72,139,0, //mov (%rax),%rax
8446 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008447 117,97, //jne d0b <_sk_load_f16_hsw+0x6b>
8448 197,121,16,4,248, //vmovupd (%rax,%rdi,8),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05008449 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
8450 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008451 197,122,111,76,248,48, //vmovdqu 0x30(%rax,%rdi,8),%xmm9
8452 197,185,97,194, //vpunpcklwd %xmm2,%xmm8,%xmm0
8453 197,185,105,210, //vpunpckhwd %xmm2,%xmm8,%xmm2
8454 196,193,97,97,201, //vpunpcklwd %xmm9,%xmm3,%xmm1
8455 196,193,97,105,217, //vpunpckhwd %xmm9,%xmm3,%xmm3
8456 197,121,97,194, //vpunpcklwd %xmm2,%xmm0,%xmm8
8457 197,121,105,202, //vpunpckhwd %xmm2,%xmm0,%xmm9
8458 197,241,97,211, //vpunpcklwd %xmm3,%xmm1,%xmm2
8459 197,241,105,219, //vpunpckhwd %xmm3,%xmm1,%xmm3
8460 197,185,108,194, //vpunpcklqdq %xmm2,%xmm8,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05008461 196,226,125,19,192, //vcvtph2ps %xmm0,%ymm0
Mike Klein64b97482017-03-14 17:35:04 -07008462 197,185,109,202, //vpunpckhqdq %xmm2,%xmm8,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05008463 196,226,125,19,201, //vcvtph2ps %xmm1,%ymm1
8464 197,177,108,211, //vpunpcklqdq %xmm3,%xmm9,%xmm2
8465 196,226,125,19,210, //vcvtph2ps %xmm2,%ymm2
8466 197,177,109,219, //vpunpckhqdq %xmm3,%xmm9,%xmm3
8467 196,226,125,19,219, //vcvtph2ps %xmm3,%ymm3
8468 72,173, //lods %ds:(%rsi),%rax
8469 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07008470 197,123,16,4,248, //vmovsd (%rax,%rdi,8),%xmm8
8471 196,65,49,239,201, //vpxor %xmm9,%xmm9,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05008472 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008473 116,79, //je d6a <_sk_load_f16_hsw+0xca>
8474 197,57,22,68,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05008475 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008476 114,67, //jb d6a <_sk_load_f16_hsw+0xca>
Mike Klein894d5612017-03-07 07:59:52 -05008477 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
8478 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008479 116,68, //je d77 <_sk_load_f16_hsw+0xd7>
Mike Klein894d5612017-03-07 07:59:52 -05008480 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
8481 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008482 114,56, //jb d77 <_sk_load_f16_hsw+0xd7>
Mike Klein894d5612017-03-07 07:59:52 -05008483 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
8484 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008485 15,132,114,255,255,255, //je cc1 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05008486 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
8487 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008488 15,130,98,255,255,255, //jb cc1 <_sk_load_f16_hsw+0x21>
8489 197,122,126,76,248,48, //vmovq 0x30(%rax,%rdi,8),%xmm9
8490 233,87,255,255,255, //jmpq cc1 <_sk_load_f16_hsw+0x21>
8491 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
8492 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
8493 233,74,255,255,255, //jmpq cc1 <_sk_load_f16_hsw+0x21>
8494 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
8495 233,65,255,255,255, //jmpq cc1 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05008496};
8497
8498CODE const uint8_t sk_store_f16_hsw[] = {
8499 72,173, //lods %ds:(%rsi),%rax
8500 72,139,0, //mov (%rax),%rax
8501 196,195,125,29,192,4, //vcvtps2ph $0x4,%ymm0,%xmm8
8502 196,195,125,29,201,4, //vcvtps2ph $0x4,%ymm1,%xmm9
8503 196,195,125,29,210,4, //vcvtps2ph $0x4,%ymm2,%xmm10
8504 196,195,125,29,219,4, //vcvtps2ph $0x4,%ymm3,%xmm11
8505 196,65,57,97,225, //vpunpcklwd %xmm9,%xmm8,%xmm12
8506 196,65,57,105,193, //vpunpckhwd %xmm9,%xmm8,%xmm8
8507 196,65,41,97,203, //vpunpcklwd %xmm11,%xmm10,%xmm9
8508 196,65,41,105,235, //vpunpckhwd %xmm11,%xmm10,%xmm13
8509 196,65,25,98,217, //vpunpckldq %xmm9,%xmm12,%xmm11
8510 196,65,25,106,209, //vpunpckhdq %xmm9,%xmm12,%xmm10
8511 196,65,57,98,205, //vpunpckldq %xmm13,%xmm8,%xmm9
8512 196,65,57,106,197, //vpunpckhdq %xmm13,%xmm8,%xmm8
8513 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008514 117,27, //jne de5 <_sk_store_f16_hsw+0x65>
Mike Klein894d5612017-03-07 07:59:52 -05008515 197,120,17,28,248, //vmovups %xmm11,(%rax,%rdi,8)
8516 197,120,17,84,248,16, //vmovups %xmm10,0x10(%rax,%rdi,8)
8517 197,120,17,76,248,32, //vmovups %xmm9,0x20(%rax,%rdi,8)
8518 197,122,127,68,248,48, //vmovdqu %xmm8,0x30(%rax,%rdi,8)
8519 72,173, //lods %ds:(%rsi),%rax
8520 255,224, //jmpq *%rax
8521 197,121,214,28,248, //vmovq %xmm11,(%rax,%rdi,8)
8522 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008523 116,241, //je de1 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008524 197,121,23,92,248,8, //vmovhpd %xmm11,0x8(%rax,%rdi,8)
8525 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008526 114,229, //jb de1 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008527 197,121,214,84,248,16, //vmovq %xmm10,0x10(%rax,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07008528 116,221, //je de1 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008529 197,121,23,84,248,24, //vmovhpd %xmm10,0x18(%rax,%rdi,8)
8530 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008531 114,209, //jb de1 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008532 197,121,214,76,248,32, //vmovq %xmm9,0x20(%rax,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07008533 116,201, //je de1 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008534 197,121,23,76,248,40, //vmovhpd %xmm9,0x28(%rax,%rdi,8)
8535 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008536 114,189, //jb de1 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008537 197,121,214,68,248,48, //vmovq %xmm8,0x30(%rax,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -07008538 235,181, //jmp de1 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008539};
8540
8541CODE const uint8_t sk_store_f32_hsw[] = {
8542 72,173, //lods %ds:(%rsi),%rax
8543 76,139,0, //mov (%rax),%r8
8544 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
8545 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
8546 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
8547 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
8548 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
8549 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
8550 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
8551 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
8552 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
8553 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008554 117,55, //jne e99 <_sk_store_f32_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05008555 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
8556 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
8557 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
8558 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
8559 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
8560 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
8561 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
8562 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
8563 72,173, //lods %ds:(%rsi),%rax
8564 255,224, //jmpq *%rax
8565 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
8566 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008567 116,240, //je e95 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008568 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
8569 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008570 114,227, //jb e95 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008571 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07008572 116,218, //je e95 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008573 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
8574 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008575 114,205, //jb e95 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008576 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07008577 116,195, //je e95 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008578 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
8579 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07008580 114,181, //jb e95 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008581 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -07008582 235,171, //jmp e95 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008583};
8584
8585CODE const uint8_t sk_clamp_x_hsw[] = {
8586 72,173, //lods %ds:(%rsi),%rax
8587 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8588 197,188,95,192, //vmaxps %ymm0,%ymm8,%ymm0
8589 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
8590 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
8591 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
8592 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
8593 72,173, //lods %ds:(%rsi),%rax
8594 255,224, //jmpq *%rax
8595};
8596
8597CODE const uint8_t sk_clamp_y_hsw[] = {
8598 72,173, //lods %ds:(%rsi),%rax
8599 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8600 197,188,95,201, //vmaxps %ymm1,%ymm8,%ymm1
8601 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
8602 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
8603 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
8604 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
8605 72,173, //lods %ds:(%rsi),%rax
8606 255,224, //jmpq *%rax
8607};
8608
8609CODE const uint8_t sk_repeat_x_hsw[] = {
8610 72,173, //lods %ds:(%rsi),%rax
8611 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8612 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
8613 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
8614 196,98,61,172,200, //vfnmadd213ps %ymm0,%ymm8,%ymm9
8615 197,253,118,192, //vpcmpeqd %ymm0,%ymm0,%ymm0
8616 197,189,254,192, //vpaddd %ymm0,%ymm8,%ymm0
8617 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
8618 72,173, //lods %ds:(%rsi),%rax
8619 255,224, //jmpq *%rax
8620};
8621
8622CODE const uint8_t sk_repeat_y_hsw[] = {
8623 72,173, //lods %ds:(%rsi),%rax
8624 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8625 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
8626 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
8627 196,98,61,172,201, //vfnmadd213ps %ymm1,%ymm8,%ymm9
8628 197,245,118,201, //vpcmpeqd %ymm1,%ymm1,%ymm1
8629 197,189,254,201, //vpaddd %ymm1,%ymm8,%ymm1
8630 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
8631 72,173, //lods %ds:(%rsi),%rax
8632 255,224, //jmpq *%rax
8633};
8634
8635CODE const uint8_t sk_mirror_x_hsw[] = {
8636 72,173, //lods %ds:(%rsi),%rax
8637 197,122,16,0, //vmovss (%rax),%xmm8
8638 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
8639 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
8640 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
8641 196,226,125,24,192, //vbroadcastss %xmm0,%ymm0
8642 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
8643 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
8644 196,66,125,172,194, //vfnmadd213ps %ymm10,%ymm0,%ymm8
8645 196,193,60,92,193, //vsubps %ymm9,%ymm8,%ymm0
8646 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8647 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
8648 197,188,84,192, //vandps %ymm0,%ymm8,%ymm0
8649 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
8650 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
8651 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
8652 72,173, //lods %ds:(%rsi),%rax
8653 255,224, //jmpq *%rax
8654};
8655
8656CODE const uint8_t sk_mirror_y_hsw[] = {
8657 72,173, //lods %ds:(%rsi),%rax
8658 197,122,16,0, //vmovss (%rax),%xmm8
8659 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
8660 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
8661 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
8662 196,226,125,24,201, //vbroadcastss %xmm1,%ymm1
8663 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
8664 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
8665 196,66,117,172,194, //vfnmadd213ps %ymm10,%ymm1,%ymm8
8666 196,193,60,92,201, //vsubps %ymm9,%ymm8,%ymm1
8667 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8668 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
8669 197,188,84,201, //vandps %ymm1,%ymm8,%ymm1
8670 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
8671 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
8672 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
8673 72,173, //lods %ds:(%rsi),%rax
8674 255,224, //jmpq *%rax
8675};
8676
Mike Kleine9ed07d2017-03-07 12:28:11 -05008677CODE const uint8_t sk_luminance_to_alpha_hsw[] = {
Mike Klein5224f462017-03-07 17:29:54 -05008678 184,208,179,89,62, //mov $0x3e59b3d0,%eax
8679 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008680 196,98,125,88,195, //vpbroadcastd %xmm3,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05008681 184,89,23,55,63, //mov $0x3f371759,%eax
8682 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -07008683 196,226,125,88,219, //vpbroadcastd %xmm3,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05008684 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
8685 196,98,125,168,193, //vfmadd213ps %ymm1,%ymm0,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05008686 184,152,221,147,61, //mov $0x3d93dd98,%eax
8687 197,249,110,192, //vmovd %eax,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07008688 196,226,125,88,216, //vpbroadcastd %xmm0,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -05008689 196,194,109,168,216, //vfmadd213ps %ymm8,%ymm2,%ymm3
8690 72,173, //lods %ds:(%rsi),%rax
Mike Klein64b97482017-03-14 17:35:04 -07008691 197,253,239,192, //vpxor %ymm0,%ymm0,%ymm0
Mike Kleine9ed07d2017-03-07 12:28:11 -05008692 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8693 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8694 255,224, //jmpq *%rax
8695};
8696
Mike Klein894d5612017-03-07 07:59:52 -05008697CODE const uint8_t sk_matrix_2x3_hsw[] = {
8698 72,173, //lods %ds:(%rsi),%rax
8699 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
8700 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
8701 196,98,125,24,64,16, //vbroadcastss 0x10(%rax),%ymm8
8702 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
8703 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
8704 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
8705 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
8706 196,98,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm9
8707 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
8708 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
8709 72,173, //lods %ds:(%rsi),%rax
8710 197,124,41,192, //vmovaps %ymm8,%ymm0
8711 197,124,41,201, //vmovaps %ymm9,%ymm1
8712 255,224, //jmpq *%rax
8713};
8714
8715CODE const uint8_t sk_matrix_3x4_hsw[] = {
8716 72,173, //lods %ds:(%rsi),%rax
8717 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
8718 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
8719 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
8720 196,98,125,24,64,36, //vbroadcastss 0x24(%rax),%ymm8
8721 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
8722 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
8723 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
8724 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
8725 196,98,125,24,88,16, //vbroadcastss 0x10(%rax),%ymm11
8726 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
8727 196,98,125,24,72,40, //vbroadcastss 0x28(%rax),%ymm9
8728 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
8729 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
8730 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
8731 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
8732 196,98,125,24,96,20, //vbroadcastss 0x14(%rax),%ymm12
8733 196,98,125,24,104,32, //vbroadcastss 0x20(%rax),%ymm13
8734 196,98,125,24,80,44, //vbroadcastss 0x2c(%rax),%ymm10
8735 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
8736 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
8737 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
8738 72,173, //lods %ds:(%rsi),%rax
8739 197,124,41,192, //vmovaps %ymm8,%ymm0
8740 197,124,41,201, //vmovaps %ymm9,%ymm1
8741 197,124,41,210, //vmovaps %ymm10,%ymm2
8742 255,224, //jmpq *%rax
8743};
8744
Mike Kleine9ed07d2017-03-07 12:28:11 -05008745CODE const uint8_t sk_matrix_4x5_hsw[] = {
8746 72,173, //lods %ds:(%rsi),%rax
8747 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
8748 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
8749 196,98,125,24,88,32, //vbroadcastss 0x20(%rax),%ymm11
8750 196,98,125,24,96,48, //vbroadcastss 0x30(%rax),%ymm12
8751 196,98,125,24,64,64, //vbroadcastss 0x40(%rax),%ymm8
8752 196,66,101,184,196, //vfmadd231ps %ymm12,%ymm3,%ymm8
8753 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
8754 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
8755 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
8756 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
8757 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
8758 196,98,125,24,96,36, //vbroadcastss 0x24(%rax),%ymm12
8759 196,98,125,24,104,52, //vbroadcastss 0x34(%rax),%ymm13
8760 196,98,125,24,72,68, //vbroadcastss 0x44(%rax),%ymm9
8761 196,66,101,184,205, //vfmadd231ps %ymm13,%ymm3,%ymm9
8762 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
8763 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
8764 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
8765 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
8766 196,98,125,24,96,24, //vbroadcastss 0x18(%rax),%ymm12
8767 196,98,125,24,104,40, //vbroadcastss 0x28(%rax),%ymm13
8768 196,98,125,24,112,56, //vbroadcastss 0x38(%rax),%ymm14
8769 196,98,125,24,80,72, //vbroadcastss 0x48(%rax),%ymm10
8770 196,66,101,184,214, //vfmadd231ps %ymm14,%ymm3,%ymm10
8771 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
8772 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
8773 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
8774 196,98,125,24,96,12, //vbroadcastss 0xc(%rax),%ymm12
8775 196,98,125,24,104,28, //vbroadcastss 0x1c(%rax),%ymm13
8776 196,98,125,24,112,44, //vbroadcastss 0x2c(%rax),%ymm14
8777 196,98,125,24,120,60, //vbroadcastss 0x3c(%rax),%ymm15
8778 196,98,125,24,88,76, //vbroadcastss 0x4c(%rax),%ymm11
8779 196,66,101,184,223, //vfmadd231ps %ymm15,%ymm3,%ymm11
8780 196,66,109,184,222, //vfmadd231ps %ymm14,%ymm2,%ymm11
8781 196,66,117,184,221, //vfmadd231ps %ymm13,%ymm1,%ymm11
8782 196,66,125,184,220, //vfmadd231ps %ymm12,%ymm0,%ymm11
8783 72,173, //lods %ds:(%rsi),%rax
8784 197,124,41,192, //vmovaps %ymm8,%ymm0
8785 197,124,41,201, //vmovaps %ymm9,%ymm1
8786 197,124,41,210, //vmovaps %ymm10,%ymm2
8787 197,124,41,219, //vmovaps %ymm11,%ymm3
8788 255,224, //jmpq *%rax
8789};
8790
Mike Klein894d5612017-03-07 07:59:52 -05008791CODE const uint8_t sk_matrix_perspective_hsw[] = {
8792 72,173, //lods %ds:(%rsi),%rax
8793 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8794 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
8795 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
8796 196,66,117,184,209, //vfmadd231ps %ymm9,%ymm1,%ymm10
8797 196,66,125,184,208, //vfmadd231ps %ymm8,%ymm0,%ymm10
8798 196,98,125,24,64,12, //vbroadcastss 0xc(%rax),%ymm8
8799 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
8800 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
8801 196,66,117,184,217, //vfmadd231ps %ymm9,%ymm1,%ymm11
8802 196,66,125,184,216, //vfmadd231ps %ymm8,%ymm0,%ymm11
8803 196,98,125,24,64,24, //vbroadcastss 0x18(%rax),%ymm8
8804 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
8805 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
8806 196,66,117,184,225, //vfmadd231ps %ymm9,%ymm1,%ymm12
8807 196,66,125,184,224, //vfmadd231ps %ymm8,%ymm0,%ymm12
8808 196,193,124,83,204, //vrcpps %ymm12,%ymm1
8809 197,172,89,193, //vmulps %ymm1,%ymm10,%ymm0
8810 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
8811 72,173, //lods %ds:(%rsi),%rax
8812 255,224, //jmpq *%rax
8813};
8814
8815CODE const uint8_t sk_linear_gradient_2stops_hsw[] = {
8816 72,173, //lods %ds:(%rsi),%rax
8817 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
8818 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8819 196,98,125,184,193, //vfmadd231ps %ymm1,%ymm0,%ymm8
8820 196,226,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm2
8821 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
8822 196,226,125,184,202, //vfmadd231ps %ymm2,%ymm0,%ymm1
8823 196,226,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm3
8824 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
8825 196,226,125,184,211, //vfmadd231ps %ymm3,%ymm0,%ymm2
8826 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
8827 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
8828 196,194,125,184,217, //vfmadd231ps %ymm9,%ymm0,%ymm3
8829 72,173, //lods %ds:(%rsi),%rax
8830 197,124,41,192, //vmovaps %ymm8,%ymm0
8831 255,224, //jmpq *%rax
8832};
8833
8834CODE const uint8_t sk_start_pipeline_avx[] = {
8835 65,87, //push %r15
8836 65,86, //push %r14
8837 65,85, //push %r13
8838 65,84, //push %r12
8839 86, //push %rsi
8840 87, //push %rdi
8841 83, //push %rbx
8842 72,129,236,160,0,0,0, //sub $0xa0,%rsp
8843 197,120,41,188,36,144,0,0,0, //vmovaps %xmm15,0x90(%rsp)
8844 197,120,41,180,36,128,0,0,0, //vmovaps %xmm14,0x80(%rsp)
8845 197,120,41,108,36,112, //vmovaps %xmm13,0x70(%rsp)
8846 197,120,41,100,36,96, //vmovaps %xmm12,0x60(%rsp)
8847 197,120,41,92,36,80, //vmovaps %xmm11,0x50(%rsp)
8848 197,120,41,84,36,64, //vmovaps %xmm10,0x40(%rsp)
8849 197,120,41,76,36,48, //vmovaps %xmm9,0x30(%rsp)
8850 197,120,41,68,36,32, //vmovaps %xmm8,0x20(%rsp)
8851 197,248,41,124,36,16, //vmovaps %xmm7,0x10(%rsp)
8852 197,248,41,52,36, //vmovaps %xmm6,(%rsp)
8853 77,137,205, //mov %r9,%r13
8854 77,137,198, //mov %r8,%r14
8855 72,137,203, //mov %rcx,%rbx
8856 72,137,214, //mov %rdx,%rsi
8857 72,173, //lods %ds:(%rsi),%rax
8858 73,137,199, //mov %rax,%r15
8859 73,137,244, //mov %rsi,%r12
8860 72,141,67,8, //lea 0x8(%rbx),%rax
8861 76,57,232, //cmp %r13,%rax
8862 118,5, //jbe 75 <_sk_start_pipeline_avx+0x75>
8863 72,137,223, //mov %rbx,%rdi
8864 235,65, //jmp b6 <_sk_start_pipeline_avx+0xb6>
8865 185,0,0,0,0, //mov $0x0,%ecx
8866 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8867 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8868 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8869 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8870 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
8871 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
8872 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
8873 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
8874 72,137,223, //mov %rbx,%rdi
8875 76,137,230, //mov %r12,%rsi
8876 76,137,242, //mov %r14,%rdx
8877 65,255,215, //callq *%r15
8878 72,141,123,8, //lea 0x8(%rbx),%rdi
8879 72,131,195,16, //add $0x10,%rbx
8880 76,57,235, //cmp %r13,%rbx
8881 72,137,251, //mov %rdi,%rbx
8882 118,191, //jbe 75 <_sk_start_pipeline_avx+0x75>
8883 76,137,233, //mov %r13,%rcx
8884 72,41,249, //sub %rdi,%rcx
8885 116,41, //je e7 <_sk_start_pipeline_avx+0xe7>
8886 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8887 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8888 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8889 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8890 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
8891 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
8892 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
8893 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
8894 76,137,230, //mov %r12,%rsi
8895 76,137,242, //mov %r14,%rdx
8896 65,255,215, //callq *%r15
8897 76,137,232, //mov %r13,%rax
8898 197,248,40,52,36, //vmovaps (%rsp),%xmm6
8899 197,248,40,124,36,16, //vmovaps 0x10(%rsp),%xmm7
8900 197,120,40,68,36,32, //vmovaps 0x20(%rsp),%xmm8
8901 197,120,40,76,36,48, //vmovaps 0x30(%rsp),%xmm9
8902 197,120,40,84,36,64, //vmovaps 0x40(%rsp),%xmm10
8903 197,120,40,92,36,80, //vmovaps 0x50(%rsp),%xmm11
8904 197,120,40,100,36,96, //vmovaps 0x60(%rsp),%xmm12
8905 197,120,40,108,36,112, //vmovaps 0x70(%rsp),%xmm13
8906 197,120,40,180,36,128,0,0,0, //vmovaps 0x80(%rsp),%xmm14
8907 197,120,40,188,36,144,0,0,0, //vmovaps 0x90(%rsp),%xmm15
8908 72,129,196,160,0,0,0, //add $0xa0,%rsp
8909 91, //pop %rbx
8910 95, //pop %rdi
8911 94, //pop %rsi
8912 65,92, //pop %r12
8913 65,93, //pop %r13
8914 65,94, //pop %r14
8915 65,95, //pop %r15
8916 197,248,119, //vzeroupper
8917 195, //retq
8918};
8919
8920CODE const uint8_t sk_just_return_avx[] = {
8921 195, //retq
8922};
8923
8924CODE const uint8_t sk_seed_shader_avx[] = {
8925 72,173, //lods %ds:(%rsi),%rax
8926 197,249,110,199, //vmovd %edi,%xmm0
8927 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
8928 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
8929 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008930 65,184,0,0,0,63, //mov $0x3f000000,%r8d
8931 196,193,121,110,200, //vmovd %r8d,%xmm1
8932 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
8933 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05008934 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05008935 197,252,88,2, //vaddps (%rdx),%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05008936 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
8937 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
8938 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008939 184,0,0,128,63, //mov $0x3f800000,%eax
8940 197,249,110,208, //vmovd %eax,%xmm2
8941 196,227,121,4,210,0, //vpermilps $0x0,%xmm2,%xmm2
8942 196,227,109,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05008943 72,173, //lods %ds:(%rsi),%rax
8944 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8945 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
8946 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
8947 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
8948 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
8949 255,224, //jmpq *%rax
8950};
8951
8952CODE const uint8_t sk_constant_color_avx[] = {
8953 72,173, //lods %ds:(%rsi),%rax
8954 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
8955 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
8956 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
8957 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
8958 72,173, //lods %ds:(%rsi),%rax
8959 255,224, //jmpq *%rax
8960};
8961
8962CODE const uint8_t sk_clear_avx[] = {
8963 72,173, //lods %ds:(%rsi),%rax
8964 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8965 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8966 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8967 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8968 255,224, //jmpq *%rax
8969};
8970
8971CODE const uint8_t sk_plus__avx[] = {
8972 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
8973 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
8974 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
8975 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
8976 72,173, //lods %ds:(%rsi),%rax
8977 255,224, //jmpq *%rax
8978};
8979
8980CODE const uint8_t sk_srcover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008981 184,0,0,128,63, //mov $0x3f800000,%eax
8982 197,121,110,192, //vmovd %eax,%xmm8
8983 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
8984 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008985 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
8986 197,60,89,204, //vmulps %ymm4,%ymm8,%ymm9
8987 197,180,88,192, //vaddps %ymm0,%ymm9,%ymm0
8988 197,60,89,205, //vmulps %ymm5,%ymm8,%ymm9
8989 197,180,88,201, //vaddps %ymm1,%ymm9,%ymm1
8990 197,60,89,206, //vmulps %ymm6,%ymm8,%ymm9
8991 197,180,88,210, //vaddps %ymm2,%ymm9,%ymm2
8992 197,60,89,199, //vmulps %ymm7,%ymm8,%ymm8
8993 197,188,88,219, //vaddps %ymm3,%ymm8,%ymm3
8994 72,173, //lods %ds:(%rsi),%rax
8995 255,224, //jmpq *%rax
8996};
8997
8998CODE const uint8_t sk_dstover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008999 184,0,0,128,63, //mov $0x3f800000,%eax
9000 197,121,110,192, //vmovd %eax,%xmm8
9001 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9002 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05009003 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
9004 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
9005 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
9006 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
9007 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
9008 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
9009 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
9010 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
9011 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
9012 72,173, //lods %ds:(%rsi),%rax
9013 255,224, //jmpq *%rax
9014};
9015
9016CODE const uint8_t sk_clamp_0_avx[] = {
9017 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
9018 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
9019 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
9020 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
9021 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
9022 72,173, //lods %ds:(%rsi),%rax
9023 255,224, //jmpq *%rax
9024};
9025
9026CODE const uint8_t sk_clamp_1_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009027 184,0,0,128,63, //mov $0x3f800000,%eax
9028 197,121,110,192, //vmovd %eax,%xmm8
9029 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9030 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05009031 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
9032 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
9033 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
9034 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
9035 72,173, //lods %ds:(%rsi),%rax
9036 255,224, //jmpq *%rax
9037};
9038
9039CODE const uint8_t sk_clamp_a_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009040 184,0,0,128,63, //mov $0x3f800000,%eax
9041 197,121,110,192, //vmovd %eax,%xmm8
9042 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9043 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05009044 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
9045 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
9046 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
9047 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
9048 72,173, //lods %ds:(%rsi),%rax
9049 255,224, //jmpq *%rax
9050};
9051
9052CODE const uint8_t sk_set_rgb_avx[] = {
9053 72,173, //lods %ds:(%rsi),%rax
9054 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
9055 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
9056 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
9057 72,173, //lods %ds:(%rsi),%rax
9058 255,224, //jmpq *%rax
9059};
9060
9061CODE const uint8_t sk_swap_rb_avx[] = {
9062 197,124,40,192, //vmovaps %ymm0,%ymm8
9063 72,173, //lods %ds:(%rsi),%rax
9064 197,252,40,194, //vmovaps %ymm2,%ymm0
9065 197,124,41,194, //vmovaps %ymm8,%ymm2
9066 255,224, //jmpq *%rax
9067};
9068
9069CODE const uint8_t sk_swap_avx[] = {
9070 197,124,40,195, //vmovaps %ymm3,%ymm8
9071 197,124,40,202, //vmovaps %ymm2,%ymm9
9072 197,124,40,209, //vmovaps %ymm1,%ymm10
9073 197,124,40,216, //vmovaps %ymm0,%ymm11
9074 72,173, //lods %ds:(%rsi),%rax
9075 197,252,40,196, //vmovaps %ymm4,%ymm0
9076 197,252,40,205, //vmovaps %ymm5,%ymm1
9077 197,252,40,214, //vmovaps %ymm6,%ymm2
9078 197,252,40,223, //vmovaps %ymm7,%ymm3
9079 197,124,41,220, //vmovaps %ymm11,%ymm4
9080 197,124,41,213, //vmovaps %ymm10,%ymm5
9081 197,124,41,206, //vmovaps %ymm9,%ymm6
9082 197,124,41,199, //vmovaps %ymm8,%ymm7
9083 255,224, //jmpq *%rax
9084};
9085
9086CODE const uint8_t sk_move_src_dst_avx[] = {
9087 72,173, //lods %ds:(%rsi),%rax
9088 197,252,40,224, //vmovaps %ymm0,%ymm4
9089 197,252,40,233, //vmovaps %ymm1,%ymm5
9090 197,252,40,242, //vmovaps %ymm2,%ymm6
9091 197,252,40,251, //vmovaps %ymm3,%ymm7
9092 255,224, //jmpq *%rax
9093};
9094
9095CODE const uint8_t sk_move_dst_src_avx[] = {
9096 72,173, //lods %ds:(%rsi),%rax
9097 197,252,40,196, //vmovaps %ymm4,%ymm0
9098 197,252,40,205, //vmovaps %ymm5,%ymm1
9099 197,252,40,214, //vmovaps %ymm6,%ymm2
9100 197,252,40,223, //vmovaps %ymm7,%ymm3
9101 255,224, //jmpq *%rax
9102};
9103
9104CODE const uint8_t sk_premul_avx[] = {
9105 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
9106 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
9107 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
9108 72,173, //lods %ds:(%rsi),%rax
9109 255,224, //jmpq *%rax
9110};
9111
9112CODE const uint8_t sk_unpremul_avx[] = {
9113 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
9114 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009115 184,0,0,128,63, //mov $0x3f800000,%eax
9116 197,121,110,208, //vmovd %eax,%xmm10
9117 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
9118 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05009119 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
9120 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
9121 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
9122 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
9123 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
9124 72,173, //lods %ds:(%rsi),%rax
9125 255,224, //jmpq *%rax
9126};
9127
9128CODE const uint8_t sk_from_srgb_avx[] = {
Mike Klein5224f462017-03-07 17:29:54 -05009129 184,145,131,158,61, //mov $0x3d9e8391,%eax
9130 197,121,110,192, //vmovd %eax,%xmm8
9131 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9132 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05009133 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
9134 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05009135 184,154,153,153,62, //mov $0x3e99999a,%eax
9136 197,121,110,216, //vmovd %eax,%xmm11
9137 196,67,121,4,219,0, //vpermilps $0x0,%xmm11,%xmm11
9138 196,67,37,24,219,1, //vinsertf128 $0x1,%xmm11,%ymm11,%ymm11
9139 184,92,143,50,63, //mov $0x3f328f5c,%eax
9140 197,121,110,224, //vmovd %eax,%xmm12
9141 196,67,121,4,228,0, //vpermilps $0x0,%xmm12,%xmm12
9142 196,67,29,24,228,1, //vinsertf128 $0x1,%xmm12,%ymm12,%ymm12
Mike Klein894d5612017-03-07 07:59:52 -05009143 197,36,89,232, //vmulps %ymm0,%ymm11,%ymm13
9144 196,65,20,88,236, //vaddps %ymm12,%ymm13,%ymm13
Mike Klein5224f462017-03-07 17:29:54 -05009145 184,10,215,35,59, //mov $0x3b23d70a,%eax
9146 197,121,110,240, //vmovd %eax,%xmm14
9147 196,67,121,4,246,0, //vpermilps $0x0,%xmm14,%xmm14
9148 196,67,13,24,246,1, //vinsertf128 $0x1,%xmm14,%ymm14,%ymm14
Mike Klein894d5612017-03-07 07:59:52 -05009149 196,65,44,89,213, //vmulps %ymm13,%ymm10,%ymm10
9150 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05009151 184,174,71,97,61, //mov $0x3d6147ae,%eax
9152 197,121,110,232, //vmovd %eax,%xmm13
9153 196,67,121,4,237,0, //vpermilps $0x0,%xmm13,%xmm13
9154 196,67,21,24,237,1, //vinsertf128 $0x1,%xmm13,%ymm13,%ymm13
Mike Klein894d5612017-03-07 07:59:52 -05009155 196,193,124,194,197,1, //vcmpltps %ymm13,%ymm0,%ymm0
9156 196,195,45,74,193,0, //vblendvps %ymm0,%ymm9,%ymm10,%ymm0
9157 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
9158 197,116,89,209, //vmulps %ymm1,%ymm1,%ymm10
9159 197,36,89,249, //vmulps %ymm1,%ymm11,%ymm15
Mike Klein5224f462017-03-07 17:29:54 -05009160 196,65,28,88,255, //vaddps %ymm15,%ymm12,%ymm15
Mike Klein894d5612017-03-07 07:59:52 -05009161 196,65,44,89,215, //vmulps %ymm15,%ymm10,%ymm10
9162 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
9163 196,193,116,194,205,1, //vcmpltps %ymm13,%ymm1,%ymm1
9164 196,195,45,74,201,16, //vblendvps %ymm1,%ymm9,%ymm10,%ymm1
9165 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
9166 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
9167 197,36,89,210, //vmulps %ymm2,%ymm11,%ymm10
Mike Klein5224f462017-03-07 17:29:54 -05009168 196,65,28,88,210, //vaddps %ymm10,%ymm12,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05009169 196,65,52,89,202, //vmulps %ymm10,%ymm9,%ymm9
9170 196,65,12,88,201, //vaddps %ymm9,%ymm14,%ymm9
9171 196,193,108,194,213,1, //vcmpltps %ymm13,%ymm2,%ymm2
9172 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
9173 72,173, //lods %ds:(%rsi),%rax
9174 255,224, //jmpq *%rax
9175};
9176
9177CODE const uint8_t sk_to_srgb_avx[] = {
9178 197,124,82,192, //vrsqrtps %ymm0,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05009179 196,65,124,83,232, //vrcpps %ymm8,%ymm13
9180 196,65,124,82,240, //vrsqrtps %ymm8,%ymm14
9181 184,41,92,71,65, //mov $0x41475c29,%eax
9182 197,121,110,192, //vmovd %eax,%xmm8
9183 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9184 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
9185 197,60,89,224, //vmulps %ymm0,%ymm8,%ymm12
9186 184,0,0,128,63, //mov $0x3f800000,%eax
9187 197,121,110,200, //vmovd %eax,%xmm9
9188 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
9189 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
9190 184,194,135,210,62, //mov $0x3ed287c2,%eax
9191 197,121,110,208, //vmovd %eax,%xmm10
9192 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
9193 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
9194 184,206,111,48,63, //mov $0x3f306fce,%eax
9195 197,121,110,216, //vmovd %eax,%xmm11
9196 196,67,121,4,219,0, //vpermilps $0x0,%xmm11,%xmm11
9197 196,67,37,24,219,1, //vinsertf128 $0x1,%xmm11,%ymm11,%ymm11
9198 184,168,87,202,61, //mov $0x3dca57a8,%eax
9199 53,0,0,0,128, //xor $0x80000000,%eax
9200 197,121,110,248, //vmovd %eax,%xmm15
9201 196,67,121,4,255,0, //vpermilps $0x0,%xmm15,%xmm15
9202 196,67,5,24,255,1, //vinsertf128 $0x1,%xmm15,%ymm15,%ymm15
9203 196,65,20,89,235, //vmulps %ymm11,%ymm13,%ymm13
9204 196,65,20,88,239, //vaddps %ymm15,%ymm13,%ymm13
9205 196,65,12,89,242, //vmulps %ymm10,%ymm14,%ymm14
9206 196,65,12,88,237, //vaddps %ymm13,%ymm14,%ymm13
9207 196,65,52,93,237, //vminps %ymm13,%ymm9,%ymm13
9208 184,4,231,140,59, //mov $0x3b8ce704,%eax
9209 197,121,110,240, //vmovd %eax,%xmm14
9210 196,67,121,4,246,0, //vpermilps $0x0,%xmm14,%xmm14
9211 196,67,13,24,246,1, //vinsertf128 $0x1,%xmm14,%ymm14,%ymm14
9212 196,193,124,194,198,1, //vcmpltps %ymm14,%ymm0,%ymm0
9213 196,195,21,74,196,0, //vblendvps %ymm0,%ymm12,%ymm13,%ymm0
9214 197,124,82,225, //vrsqrtps %ymm1,%ymm12
9215 196,65,124,83,236, //vrcpps %ymm12,%ymm13
9216 196,65,124,82,228, //vrsqrtps %ymm12,%ymm12
9217 196,65,36,89,237, //vmulps %ymm13,%ymm11,%ymm13
9218 196,65,4,88,237, //vaddps %ymm13,%ymm15,%ymm13
9219 196,65,44,89,228, //vmulps %ymm12,%ymm10,%ymm12
9220 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
9221 197,60,89,233, //vmulps %ymm1,%ymm8,%ymm13
9222 196,65,52,93,228, //vminps %ymm12,%ymm9,%ymm12
9223 196,193,116,194,206,1, //vcmpltps %ymm14,%ymm1,%ymm1
9224 196,195,29,74,205,16, //vblendvps %ymm1,%ymm13,%ymm12,%ymm1
9225 197,124,82,226, //vrsqrtps %ymm2,%ymm12
9226 196,65,124,83,236, //vrcpps %ymm12,%ymm13
9227 196,65,36,89,221, //vmulps %ymm13,%ymm11,%ymm11
Mike Klein894d5612017-03-07 07:59:52 -05009228 196,65,4,88,219, //vaddps %ymm11,%ymm15,%ymm11
Mike Klein5224f462017-03-07 17:29:54 -05009229 196,65,124,82,228, //vrsqrtps %ymm12,%ymm12
9230 196,65,44,89,212, //vmulps %ymm12,%ymm10,%ymm10
9231 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
9232 196,65,52,93,202, //vminps %ymm10,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05009233 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05009234 196,193,108,194,214,1, //vcmpltps %ymm14,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05009235 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
9236 72,173, //lods %ds:(%rsi),%rax
9237 255,224, //jmpq *%rax
9238};
9239
9240CODE const uint8_t sk_scale_1_float_avx[] = {
9241 72,173, //lods %ds:(%rsi),%rax
9242 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9243 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
9244 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
9245 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
9246 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
9247 72,173, //lods %ds:(%rsi),%rax
9248 255,224, //jmpq *%rax
9249};
9250
9251CODE const uint8_t sk_scale_u8_avx[] = {
9252 73,137,200, //mov %rcx,%r8
9253 72,173, //lods %ds:(%rsi),%rax
9254 72,139,0, //mov (%rax),%rax
9255 72,1,248, //add %rdi,%rax
9256 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05009257 117,80, //jne 639 <_sk_scale_u8_avx+0x60>
Mike Klein64b97482017-03-14 17:35:04 -07009258 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05009259 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
9260 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
9261 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
9262 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
9263 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009264 184,129,128,128,59, //mov $0x3b808081,%eax
9265 197,121,110,200, //vmovd %eax,%xmm9
9266 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
9267 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05009268 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
9269 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
9270 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
9271 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
9272 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
9273 72,173, //lods %ds:(%rsi),%rax
9274 76,137,193, //mov %r8,%rcx
9275 255,224, //jmpq *%rax
9276 49,201, //xor %ecx,%ecx
9277 77,137,194, //mov %r8,%r10
9278 69,49,201, //xor %r9d,%r9d
9279 68,15,182,24, //movzbl (%rax),%r11d
9280 72,255,192, //inc %rax
9281 73,211,227, //shl %cl,%r11
9282 77,9,217, //or %r11,%r9
9283 72,131,193,8, //add $0x8,%rcx
9284 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05009285 117,234, //jne 641 <_sk_scale_u8_avx+0x68>
Mike Klein894d5612017-03-07 07:59:52 -05009286 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05009287 235,143, //jmp 5ed <_sk_scale_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05009288};
9289
9290CODE const uint8_t sk_lerp_1_float_avx[] = {
9291 72,173, //lods %ds:(%rsi),%rax
9292 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9293 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
9294 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
9295 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
9296 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
9297 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
9298 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
9299 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
9300 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
9301 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
9302 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
9303 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
9304 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
9305 72,173, //lods %ds:(%rsi),%rax
9306 255,224, //jmpq *%rax
9307};
9308
9309CODE const uint8_t sk_lerp_u8_avx[] = {
9310 73,137,200, //mov %rcx,%r8
9311 72,173, //lods %ds:(%rsi),%rax
9312 72,139,0, //mov (%rax),%rax
9313 72,1,248, //add %rdi,%rax
9314 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05009315 117,116, //jne 721 <_sk_lerp_u8_avx+0x84>
Mike Klein64b97482017-03-14 17:35:04 -07009316 197,122,126,0, //vmovq (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05009317 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
9318 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
9319 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
9320 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
9321 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009322 184,129,128,128,59, //mov $0x3b808081,%eax
9323 197,121,110,200, //vmovd %eax,%xmm9
9324 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
9325 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05009326 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
9327 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
9328 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
9329 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
9330 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
9331 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
9332 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
9333 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
9334 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
9335 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
9336 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
9337 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
9338 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
9339 72,173, //lods %ds:(%rsi),%rax
9340 76,137,193, //mov %r8,%rcx
9341 255,224, //jmpq *%rax
9342 49,201, //xor %ecx,%ecx
9343 77,137,194, //mov %r8,%r10
9344 69,49,201, //xor %r9d,%r9d
9345 68,15,182,24, //movzbl (%rax),%r11d
9346 72,255,192, //inc %rax
9347 73,211,227, //shl %cl,%r11
9348 77,9,217, //or %r11,%r9
9349 72,131,193,8, //add $0x8,%rcx
9350 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05009351 117,234, //jne 729 <_sk_lerp_u8_avx+0x8c>
Mike Klein894d5612017-03-07 07:59:52 -05009352 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05009353 233,104,255,255,255, //jmpq 6b1 <_sk_lerp_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05009354};
9355
9356CODE const uint8_t sk_lerp_565_avx[] = {
9357 72,173, //lods %ds:(%rsi),%rax
9358 76,139,16, //mov (%rax),%r10
9359 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05009360 15,133,250,0,0,0, //jne 851 <_sk_lerp_565_avx+0x108>
Mike Klein894d5612017-03-07 07:59:52 -05009361 196,65,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm8
9362 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
9363 197,185,105,219, //vpunpckhwd %xmm3,%xmm8,%xmm3
9364 196,66,121,51,192, //vpmovzxwd %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05009365 196,99,61,24,195,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm8
9366 184,0,248,0,0, //mov $0xf800,%eax
9367 197,249,110,216, //vmovd %eax,%xmm3
9368 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
9369 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9370 196,193,100,84,216, //vandps %ymm8,%ymm3,%ymm3
9371 197,124,91,203, //vcvtdq2ps %ymm3,%ymm9
9372 184,8,33,132,55, //mov $0x37842108,%eax
9373 197,249,110,216, //vmovd %eax,%xmm3
9374 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9375 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9376 197,52,89,203, //vmulps %ymm3,%ymm9,%ymm9
9377 184,224,7,0,0, //mov $0x7e0,%eax
9378 197,249,110,216, //vmovd %eax,%xmm3
9379 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
9380 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9381 196,193,100,84,216, //vandps %ymm8,%ymm3,%ymm3
9382 197,124,91,211, //vcvtdq2ps %ymm3,%ymm10
9383 184,33,8,2,58, //mov $0x3a020821,%eax
9384 197,249,110,216, //vmovd %eax,%xmm3
9385 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9386 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9387 197,44,89,211, //vmulps %ymm3,%ymm10,%ymm10
9388 184,31,0,0,0, //mov $0x1f,%eax
9389 197,249,110,216, //vmovd %eax,%xmm3
9390 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
9391 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9392 196,193,100,84,216, //vandps %ymm8,%ymm3,%ymm3
9393 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
9394 184,8,33,4,61, //mov $0x3d042108,%eax
9395 197,249,110,216, //vmovd %eax,%xmm3
9396 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9397 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9398 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05009399 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05009400 196,193,124,89,193, //vmulps %ymm9,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05009401 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
9402 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05009403 196,193,116,89,202, //vmulps %ymm10,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05009404 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
9405 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
9406 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
9407 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009408 184,0,0,128,63, //mov $0x3f800000,%eax
9409 197,249,110,216, //vmovd %eax,%xmm3
9410 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9411 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05009412 72,173, //lods %ds:(%rsi),%rax
9413 255,224, //jmpq *%rax
9414 65,137,200, //mov %ecx,%r8d
9415 65,128,224,7, //and $0x7,%r8b
9416 196,65,57,239,192, //vpxor %xmm8,%xmm8,%xmm8
9417 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05009418 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07009419 15,135,243,254,255,255, //ja 75d <_sk_lerp_565_avx+0x14>
9420 69,15,182,192, //movzbl %r8b,%r8d
Mike Klein5224f462017-03-07 17:29:54 -05009421 76,141,13,75,0,0,0, //lea 0x4b(%rip),%r9 # 8c0 <_sk_lerp_565_avx+0x177>
Mike Klein894d5612017-03-07 07:59:52 -05009422 75,99,4,129, //movslq (%r9,%r8,4),%rax
9423 76,1,200, //add %r9,%rax
9424 255,224, //jmpq *%rax
9425 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
9426 196,65,97,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm8
9427 196,65,57,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm8,%xmm8
9428 196,65,57,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm8,%xmm8
9429 196,65,57,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm8,%xmm8
9430 196,65,57,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
9431 196,65,57,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
9432 196,65,57,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -05009433 233,159,254,255,255, //jmpq 75d <_sk_lerp_565_avx+0x14>
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009434 102,144, //xchg %ax,%ax
9435 242,255, //repnz (bad)
Mike Klein894d5612017-03-07 07:59:52 -05009436 255, //(bad)
9437 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009438 234, //(bad)
Mike Klein894d5612017-03-07 07:59:52 -05009439 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009440 255, //(bad)
9441 255,226, //jmpq *%rdx
Mike Klein894d5612017-03-07 07:59:52 -05009442 255, //(bad)
9443 255, //(bad)
9444 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009445 218,255, //(bad)
Mike Klein894d5612017-03-07 07:59:52 -05009446 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009447 255,210, //callq *%rdx
Mike Klein894d5612017-03-07 07:59:52 -05009448 255, //(bad)
9449 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009450 255,202, //dec %edx
Mike Klein894d5612017-03-07 07:59:52 -05009451 255, //(bad)
9452 255, //(bad)
9453 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009454 190, //.byte 0xbe
Mike Klein894d5612017-03-07 07:59:52 -05009455 255, //(bad)
9456 255, //(bad)
9457 255, //.byte 0xff
9458};
9459
9460CODE const uint8_t sk_load_tables_avx[] = {
9461 85, //push %rbp
9462 65,87, //push %r15
9463 65,86, //push %r14
9464 65,85, //push %r13
9465 65,84, //push %r12
9466 83, //push %rbx
9467 72,173, //lods %ds:(%rsi),%rax
9468 76,139,0, //mov (%rax),%r8
9469 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05009470 15,133,56,2,0,0, //jne b2c <_sk_load_tables_avx+0x250>
Mike Klein894d5612017-03-07 07:59:52 -05009471 196,65,124,16,4,184, //vmovups (%r8,%rdi,4),%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05009472 187,255,0,0,0, //mov $0xff,%ebx
9473 197,249,110,195, //vmovd %ebx,%xmm0
9474 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
9475 196,99,125,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05009476 196,193,52,84,192, //vandps %ymm8,%ymm9,%ymm0
9477 196,193,249,126,193, //vmovq %xmm0,%r9
9478 69,137,203, //mov %r9d,%r11d
9479 196,195,249,22,194,1, //vpextrq $0x1,%xmm0,%r10
9480 69,137,214, //mov %r10d,%r14d
9481 73,193,234,32, //shr $0x20,%r10
9482 73,193,233,32, //shr $0x20,%r9
9483 196,227,125,25,192,1, //vextractf128 $0x1,%ymm0,%xmm0
9484 196,193,249,126,196, //vmovq %xmm0,%r12
9485 69,137,231, //mov %r12d,%r15d
9486 196,227,249,22,195,1, //vpextrq $0x1,%xmm0,%rbx
9487 65,137,221, //mov %ebx,%r13d
9488 72,193,235,32, //shr $0x20,%rbx
9489 73,193,236,32, //shr $0x20,%r12
9490 72,139,104,8, //mov 0x8(%rax),%rbp
9491 76,139,64,16, //mov 0x10(%rax),%r8
9492 196,161,122,16,68,189,0, //vmovss 0x0(%rbp,%r15,4),%xmm0
9493 196,163,121,33,68,165,0,16, //vinsertps $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05009494 196,161,122,16,76,173,0, //vmovss 0x0(%rbp,%r13,4),%xmm1
9495 196,227,121,33,193,32, //vinsertps $0x20,%xmm1,%xmm0,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05009496 197,250,16,76,157,0, //vmovss 0x0(%rbp,%rbx,4),%xmm1
9497 196,227,121,33,193,48, //vinsertps $0x30,%xmm1,%xmm0,%xmm0
9498 196,161,122,16,76,157,0, //vmovss 0x0(%rbp,%r11,4),%xmm1
9499 196,163,113,33,76,141,0,16, //vinsertps $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -05009500 196,161,122,16,92,181,0, //vmovss 0x0(%rbp,%r14,4),%xmm3
9501 196,227,113,33,203,32, //vinsertps $0x20,%xmm3,%xmm1,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05009502 196,161,122,16,92,149,0, //vmovss 0x0(%rbp,%r10,4),%xmm3
9503 196,227,113,33,203,48, //vinsertps $0x30,%xmm3,%xmm1,%xmm1
9504 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
9505 196,193,113,114,208,8, //vpsrld $0x8,%xmm8,%xmm1
9506 196,67,125,25,194,1, //vextractf128 $0x1,%ymm8,%xmm10
9507 196,193,105,114,210,8, //vpsrld $0x8,%xmm10,%xmm2
9508 196,227,117,24,202,1, //vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
9509 197,180,84,201, //vandps %ymm1,%ymm9,%ymm1
9510 196,193,249,126,201, //vmovq %xmm1,%r9
9511 69,137,203, //mov %r9d,%r11d
9512 196,195,249,22,202,1, //vpextrq $0x1,%xmm1,%r10
9513 69,137,214, //mov %r10d,%r14d
9514 73,193,234,32, //shr $0x20,%r10
9515 73,193,233,32, //shr $0x20,%r9
9516 196,227,125,25,201,1, //vextractf128 $0x1,%ymm1,%xmm1
9517 196,225,249,126,205, //vmovq %xmm1,%rbp
9518 65,137,239, //mov %ebp,%r15d
9519 196,227,249,22,203,1, //vpextrq $0x1,%xmm1,%rbx
9520 65,137,220, //mov %ebx,%r12d
9521 72,193,235,32, //shr $0x20,%rbx
9522 72,193,237,32, //shr $0x20,%rbp
9523 196,129,122,16,12,184, //vmovss (%r8,%r15,4),%xmm1
9524 196,195,113,33,12,168,16, //vinsertps $0x10,(%r8,%rbp,4),%xmm1,%xmm1
9525 196,129,122,16,20,160, //vmovss (%r8,%r12,4),%xmm2
9526 196,227,113,33,202,32, //vinsertps $0x20,%xmm2,%xmm1,%xmm1
9527 196,193,122,16,20,152, //vmovss (%r8,%rbx,4),%xmm2
9528 196,227,113,33,202,48, //vinsertps $0x30,%xmm2,%xmm1,%xmm1
9529 196,129,122,16,20,152, //vmovss (%r8,%r11,4),%xmm2
9530 196,131,105,33,20,136,16, //vinsertps $0x10,(%r8,%r9,4),%xmm2,%xmm2
9531 196,129,122,16,28,176, //vmovss (%r8,%r14,4),%xmm3
9532 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
9533 196,129,122,16,28,144, //vmovss (%r8,%r10,4),%xmm3
9534 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
9535 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
9536 72,139,64,24, //mov 0x18(%rax),%rax
9537 196,193,105,114,208,16, //vpsrld $0x10,%xmm8,%xmm2
9538 196,193,97,114,210,16, //vpsrld $0x10,%xmm10,%xmm3
9539 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
9540 197,180,84,210, //vandps %ymm2,%ymm9,%ymm2
9541 196,193,249,126,208, //vmovq %xmm2,%r8
9542 69,137,194, //mov %r8d,%r10d
9543 196,195,249,22,209,1, //vpextrq $0x1,%xmm2,%r9
9544 69,137,203, //mov %r9d,%r11d
9545 73,193,233,32, //shr $0x20,%r9
9546 73,193,232,32, //shr $0x20,%r8
9547 196,227,125,25,210,1, //vextractf128 $0x1,%ymm2,%xmm2
9548 196,225,249,126,213, //vmovq %xmm2,%rbp
9549 65,137,238, //mov %ebp,%r14d
9550 196,227,249,22,211,1, //vpextrq $0x1,%xmm2,%rbx
9551 65,137,223, //mov %ebx,%r15d
9552 72,193,235,32, //shr $0x20,%rbx
9553 72,193,237,32, //shr $0x20,%rbp
9554 196,161,122,16,20,176, //vmovss (%rax,%r14,4),%xmm2
9555 196,227,105,33,20,168,16, //vinsertps $0x10,(%rax,%rbp,4),%xmm2,%xmm2
9556 196,161,122,16,28,184, //vmovss (%rax,%r15,4),%xmm3
9557 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
9558 197,250,16,28,152, //vmovss (%rax,%rbx,4),%xmm3
9559 196,99,105,33,203,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm9
9560 196,161,122,16,28,144, //vmovss (%rax,%r10,4),%xmm3
9561 196,163,97,33,28,128,16, //vinsertps $0x10,(%rax,%r8,4),%xmm3,%xmm3
9562 196,161,122,16,20,152, //vmovss (%rax,%r11,4),%xmm2
9563 196,227,97,33,210,32, //vinsertps $0x20,%xmm2,%xmm3,%xmm2
9564 196,161,122,16,28,136, //vmovss (%rax,%r9,4),%xmm3
9565 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
9566 196,195,109,24,209,1, //vinsertf128 $0x1,%xmm9,%ymm2,%ymm2
9567 196,193,57,114,208,24, //vpsrld $0x18,%xmm8,%xmm8
9568 196,193,97,114,210,24, //vpsrld $0x18,%xmm10,%xmm3
9569 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
Mike Klein5224f462017-03-07 17:29:54 -05009570 197,124,91,195, //vcvtdq2ps %ymm3,%ymm8
9571 184,129,128,128,59, //mov $0x3b808081,%eax
9572 197,249,110,216, //vmovd %eax,%xmm3
9573 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9574 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9575 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05009576 72,173, //lods %ds:(%rsi),%rax
9577 91, //pop %rbx
9578 65,92, //pop %r12
9579 65,93, //pop %r13
9580 65,94, //pop %r14
9581 65,95, //pop %r15
9582 93, //pop %rbp
9583 255,224, //jmpq *%rax
Mike Klein5224f462017-03-07 17:29:54 -05009584 137,203, //mov %ecx,%ebx
9585 128,227,7, //and $0x7,%bl
Mike Klein894d5612017-03-07 07:59:52 -05009586 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05009587 254,203, //dec %bl
Mike Klein5224f462017-03-07 17:29:54 -05009588 128,251,6, //cmp $0x6,%bl
Mike Klein64b97482017-03-14 17:35:04 -07009589 15,135,185,253,255,255, //ja 8fa <_sk_load_tables_avx+0x1e>
9590 15,182,219, //movzbl %bl,%ebx
Mike Klein5224f462017-03-07 17:29:54 -05009591 76,141,13,137,0,0,0, //lea 0x89(%rip),%r9 # bd4 <_sk_load_tables_avx+0x2f8>
9592 73,99,28,153, //movslq (%r9,%rbx,4),%rbx
9593 76,1,203, //add %r9,%rbx
9594 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05009595 196,193,121,110,68,184,24, //vmovd 0x18(%r8,%rdi,4),%xmm0
9596 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
9597 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
9598 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
9599 196,99,117,12,192,64, //vblendps $0x40,%ymm0,%ymm1,%ymm8
9600 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
9601 196,195,121,34,68,184,20,1, //vpinsrd $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
9602 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
9603 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
9604 196,195,121,34,68,184,16,0, //vpinsrd $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
9605 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
9606 196,195,57,34,68,184,12,3, //vpinsrd $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
9607 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
9608 196,195,57,34,68,184,8,2, //vpinsrd $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
9609 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
9610 196,195,57,34,68,184,4,1, //vpinsrd $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
9611 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
9612 196,195,57,34,4,184,0, //vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
9613 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
Mike Klein5224f462017-03-07 17:29:54 -05009614 233,38,253,255,255, //jmpq 8fa <_sk_load_tables_avx+0x1e>
Mike Klein894d5612017-03-07 07:59:52 -05009615 238, //out %al,(%dx)
9616 255, //(bad)
9617 255, //(bad)
9618 255,224, //jmpq *%rax
9619 255, //(bad)
9620 255, //(bad)
9621 255,210, //callq *%rdx
9622 255, //(bad)
9623 255, //(bad)
9624 255,196, //inc %esp
9625 255, //(bad)
9626 255, //(bad)
9627 255,176,255,255,255,156, //pushq -0x63000001(%rax)
9628 255, //(bad)
9629 255, //(bad)
9630 255, //.byte 0xff
9631 128,255,255, //cmp $0xff,%bh
9632 255, //.byte 0xff
9633};
9634
9635CODE const uint8_t sk_load_a8_avx[] = {
9636 73,137,200, //mov %rcx,%r8
9637 72,173, //lods %ds:(%rsi),%rax
9638 72,139,0, //mov (%rax),%rax
9639 72,1,248, //add %rdi,%rax
9640 77,133,192, //test %r8,%r8
Mike Klein5224f462017-03-07 17:29:54 -05009641 117,74, //jne c4a <_sk_load_a8_avx+0x5a>
Mike Klein64b97482017-03-14 17:35:04 -07009642 197,250,126,0, //vmovq (%rax),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05009643 196,226,121,49,200, //vpmovzxbd %xmm0,%xmm1
9644 196,227,121,4,192,229, //vpermilps $0xe5,%xmm0,%xmm0
9645 196,226,121,49,192, //vpmovzxbd %xmm0,%xmm0
9646 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
9647 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05009648 184,129,128,128,59, //mov $0x3b808081,%eax
9649 197,249,110,200, //vmovd %eax,%xmm1
9650 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
9651 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05009652 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
9653 72,173, //lods %ds:(%rsi),%rax
9654 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
9655 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
9656 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
9657 76,137,193, //mov %r8,%rcx
9658 255,224, //jmpq *%rax
9659 49,201, //xor %ecx,%ecx
9660 77,137,194, //mov %r8,%r10
9661 69,49,201, //xor %r9d,%r9d
9662 68,15,182,24, //movzbl (%rax),%r11d
9663 72,255,192, //inc %rax
9664 73,211,227, //shl %cl,%r11
9665 77,9,217, //or %r11,%r9
9666 72,131,193,8, //add $0x8,%rcx
9667 73,255,202, //dec %r10
Mike Klein5224f462017-03-07 17:29:54 -05009668 117,234, //jne c52 <_sk_load_a8_avx+0x62>
Mike Klein894d5612017-03-07 07:59:52 -05009669 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -05009670 235,149, //jmp c04 <_sk_load_a8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05009671};
9672
9673CODE const uint8_t sk_store_a8_avx[] = {
9674 72,173, //lods %ds:(%rsi),%rax
9675 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05009676 184,0,0,127,67, //mov $0x437f0000,%eax
9677 197,121,110,192, //vmovd %eax,%xmm8
9678 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9679 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05009680 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
9681 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
9682 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
9683 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
9684 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
9685 72,133,201, //test %rcx,%rcx
Mike Klein5224f462017-03-07 17:29:54 -05009686 117,10, //jne cb1 <_sk_store_a8_avx+0x42>
Mike Klein894d5612017-03-07 07:59:52 -05009687 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
9688 72,173, //lods %ds:(%rsi),%rax
9689 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07009690 65,137,200, //mov %ecx,%r8d
9691 65,128,224,7, //and $0x7,%r8b
9692 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05009693 65,128,248,6, //cmp $0x6,%r8b
Mike Klein5224f462017-03-07 17:29:54 -05009694 119,236, //ja cad <_sk_store_a8_avx+0x3e>
Mike Klein894d5612017-03-07 07:59:52 -05009695 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -07009696 65,15,182,192, //movzbl %r8b,%eax
9697 76,141,5,67,0,0,0, //lea 0x43(%rip),%r8 # d14 <_sk_store_a8_avx+0xa5>
9698 73,99,4,128, //movslq (%r8,%rax,4),%rax
9699 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05009700 255,224, //jmpq *%rax
9701 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
9702 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
9703 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
9704 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
9705 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
9706 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
9707 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Klein64b97482017-03-14 17:35:04 -07009708 235,154, //jmp cad <_sk_store_a8_avx+0x3e>
Mike Klein5224f462017-03-07 17:29:54 -05009709 144, //nop
9710 246,255, //idiv %bh
9711 255, //(bad)
9712 255, //(bad)
9713 238, //out %al,(%dx)
9714 255, //(bad)
9715 255, //(bad)
9716 255,230, //jmpq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05009717 255, //(bad)
9718 255, //(bad)
9719 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05009720 222,255, //fdivrp %st,%st(7)
9721 255, //(bad)
9722 255,214, //callq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05009723 255, //(bad)
9724 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05009725 255,206, //dec %esi
Mike Klein894d5612017-03-07 07:59:52 -05009726 255, //(bad)
9727 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05009728 255,198, //inc %esi
Mike Klein894d5612017-03-07 07:59:52 -05009729 255, //(bad)
9730 255, //(bad)
9731 255, //.byte 0xff
9732};
9733
9734CODE const uint8_t sk_load_565_avx[] = {
9735 72,173, //lods %ds:(%rsi),%rax
9736 76,139,16, //mov (%rax),%r10
9737 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07009738 15,133,209,0,0,0, //jne e0f <_sk_load_565_avx+0xdf>
Mike Klein894d5612017-03-07 07:59:52 -05009739 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
9740 197,241,239,201, //vpxor %xmm1,%xmm1,%xmm1
9741 197,249,105,201, //vpunpckhwd %xmm1,%xmm0,%xmm1
9742 196,226,121,51,192, //vpmovzxwd %xmm0,%xmm0
9743 196,227,125,24,209,1, //vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05009744 184,0,248,0,0, //mov $0xf800,%eax
9745 197,249,110,192, //vmovd %eax,%xmm0
9746 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
9747 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05009748 197,252,84,194, //vandps %ymm2,%ymm0,%ymm0
9749 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -05009750 184,8,33,132,55, //mov $0x37842108,%eax
9751 197,249,110,200, //vmovd %eax,%xmm1
9752 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
9753 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
9754 197,252,89,193, //vmulps %ymm1,%ymm0,%ymm0
9755 184,224,7,0,0, //mov $0x7e0,%eax
9756 197,249,110,200, //vmovd %eax,%xmm1
9757 197,249,112,201,0, //vpshufd $0x0,%xmm1,%xmm1
9758 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05009759 197,244,84,202, //vandps %ymm2,%ymm1,%ymm1
9760 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Klein5224f462017-03-07 17:29:54 -05009761 184,33,8,2,58, //mov $0x3a020821,%eax
9762 197,249,110,216, //vmovd %eax,%xmm3
9763 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9764 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9765 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
9766 184,31,0,0,0, //mov $0x1f,%eax
9767 197,249,110,216, //vmovd %eax,%xmm3
9768 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
9769 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05009770 197,228,84,210, //vandps %ymm2,%ymm3,%ymm2
9771 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Klein5224f462017-03-07 17:29:54 -05009772 184,8,33,4,61, //mov $0x3d042108,%eax
9773 197,249,110,216, //vmovd %eax,%xmm3
9774 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9775 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
9776 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
9777 184,0,0,128,63, //mov $0x3f800000,%eax
9778 197,249,110,216, //vmovd %eax,%xmm3
9779 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9780 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05009781 72,173, //lods %ds:(%rsi),%rax
9782 255,224, //jmpq *%rax
9783 65,137,200, //mov %ecx,%r8d
9784 65,128,224,7, //and $0x7,%r8b
9785 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
9786 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05009787 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07009788 15,135,29,255,255,255, //ja d44 <_sk_load_565_avx+0x14>
9789 69,15,182,192, //movzbl %r8b,%r8d
9790 76,141,13,74,0,0,0, //lea 0x4a(%rip),%r9 # e7c <_sk_load_565_avx+0x14c>
Mike Klein894d5612017-03-07 07:59:52 -05009791 75,99,4,129, //movslq (%r9,%r8,4),%rax
9792 76,1,200, //add %r9,%rax
9793 255,224, //jmpq *%rax
9794 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
9795 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
9796 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
9797 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
9798 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
9799 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
9800 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
9801 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -07009802 233,201,254,255,255, //jmpq d44 <_sk_load_565_avx+0x14>
Mike Klein5224f462017-03-07 17:29:54 -05009803 144, //nop
9804 243,255, //repz (bad)
9805 255, //(bad)
9806 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07009807 235,255, //jmp e81 <_sk_load_565_avx+0x151>
Mike Klein5224f462017-03-07 17:29:54 -05009808 255, //(bad)
9809 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05009810 255, //(bad)
9811 255, //(bad)
9812 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05009813 219,255, //(bad)
9814 255, //(bad)
9815 255,211, //callq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05009816 255, //(bad)
9817 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05009818 255,203, //dec %ebx
Mike Klein894d5612017-03-07 07:59:52 -05009819 255, //(bad)
9820 255, //(bad)
9821 255, //(bad)
Mike Klein5224f462017-03-07 17:29:54 -05009822 191, //.byte 0xbf
Mike Klein894d5612017-03-07 07:59:52 -05009823 255, //(bad)
9824 255, //(bad)
9825 255, //.byte 0xff
9826};
9827
9828CODE const uint8_t sk_store_565_avx[] = {
9829 72,173, //lods %ds:(%rsi),%rax
9830 76,139,8, //mov (%rax),%r9
Mike Klein5224f462017-03-07 17:29:54 -05009831 184,0,0,248,65, //mov $0x41f80000,%eax
9832 197,121,110,192, //vmovd %eax,%xmm8
9833 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9834 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05009835 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
9836 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
9837 196,193,41,114,241,11, //vpslld $0xb,%xmm9,%xmm10
9838 196,67,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm9
9839 196,193,49,114,241,11, //vpslld $0xb,%xmm9,%xmm9
9840 196,67,45,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
Mike Klein5224f462017-03-07 17:29:54 -05009841 184,0,0,124,66, //mov $0x427c0000,%eax
9842 197,121,110,208, //vmovd %eax,%xmm10
9843 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
9844 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05009845 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
9846 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
9847 196,193,33,114,242,5, //vpslld $0x5,%xmm10,%xmm11
9848 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
9849 196,193,41,114,242,5, //vpslld $0x5,%xmm10,%xmm10
9850 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
9851 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
9852 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
9853 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
9854 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
9855 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
9856 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
9857 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07009858 117,10, //jne f36 <_sk_store_565_avx+0x9e>
Mike Klein894d5612017-03-07 07:59:52 -05009859 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
9860 72,173, //lods %ds:(%rsi),%rax
9861 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -07009862 65,137,200, //mov %ecx,%r8d
9863 65,128,224,7, //and $0x7,%r8b
9864 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05009865 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07009866 119,236, //ja f32 <_sk_store_565_avx+0x9a>
9867 65,15,182,192, //movzbl %r8b,%eax
9868 76,141,5,67,0,0,0, //lea 0x43(%rip),%r8 # f94 <_sk_store_565_avx+0xfc>
9869 73,99,4,128, //movslq (%r8,%rax,4),%rax
9870 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -05009871 255,224, //jmpq *%rax
9872 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
9873 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
9874 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
9875 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
9876 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
9877 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
Mike Klein64b97482017-03-14 17:35:04 -07009878 196,67,121,21,4,121,0, //vpextrw $0x0,%xmm8,(%r9,%rdi,2)
9879 235,159, //jmp f32 <_sk_store_565_avx+0x9a>
9880 144, //nop
9881 246,255, //idiv %bh
Mike Klein894d5612017-03-07 07:59:52 -05009882 255, //(bad)
9883 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07009884 238, //out %al,(%dx)
Mike Klein894d5612017-03-07 07:59:52 -05009885 255, //(bad)
9886 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07009887 255,230, //jmpq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05009888 255, //(bad)
9889 255, //(bad)
9890 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07009891 222,255, //fdivrp %st,%st(7)
Mike Klein894d5612017-03-07 07:59:52 -05009892 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07009893 255,214, //callq *%rsi
Mike Klein894d5612017-03-07 07:59:52 -05009894 255, //(bad)
9895 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07009896 255,206, //dec %esi
Mike Klein894d5612017-03-07 07:59:52 -05009897 255, //(bad)
9898 255, //(bad)
Mike Klein64b97482017-03-14 17:35:04 -07009899 255,198, //inc %esi
Mike Klein894d5612017-03-07 07:59:52 -05009900 255, //(bad)
9901 255, //(bad)
9902 255, //.byte 0xff
9903};
9904
9905CODE const uint8_t sk_load_8888_avx[] = {
9906 72,173, //lods %ds:(%rsi),%rax
9907 76,139,16, //mov (%rax),%r10
9908 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -07009909 15,133,157,0,0,0, //jne 105b <_sk_load_8888_avx+0xab>
Mike Klein894d5612017-03-07 07:59:52 -05009910 196,65,124,16,12,186, //vmovups (%r10,%rdi,4),%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009911 184,255,0,0,0, //mov $0xff,%eax
9912 197,249,110,192, //vmovd %eax,%xmm0
9913 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
9914 196,99,125,24,216,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm11
Mike Klein894d5612017-03-07 07:59:52 -05009915 196,193,36,84,193, //vandps %ymm9,%ymm11,%ymm0
9916 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009917 184,129,128,128,59, //mov $0x3b808081,%eax
9918 197,249,110,200, //vmovd %eax,%xmm1
9919 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
9920 196,99,117,24,193,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm8
9921 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05009922 196,193,41,114,209,8, //vpsrld $0x8,%xmm9,%xmm10
9923 196,99,125,25,203,1, //vextractf128 $0x1,%ymm9,%xmm3
9924 197,241,114,211,8, //vpsrld $0x8,%xmm3,%xmm1
9925 196,227,45,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm10,%ymm1
9926 197,164,84,201, //vandps %ymm1,%ymm11,%ymm1
9927 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009928 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05009929 196,193,41,114,209,16, //vpsrld $0x10,%xmm9,%xmm10
9930 197,233,114,211,16, //vpsrld $0x10,%xmm3,%xmm2
9931 196,227,45,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm10,%ymm2
9932 197,164,84,210, //vandps %ymm2,%ymm11,%ymm2
9933 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009934 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05009935 196,193,49,114,209,24, //vpsrld $0x18,%xmm9,%xmm9
9936 197,225,114,211,24, //vpsrld $0x18,%xmm3,%xmm3
9937 196,227,53,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
9938 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
9939 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
9940 72,173, //lods %ds:(%rsi),%rax
9941 255,224, //jmpq *%rax
9942 65,137,200, //mov %ecx,%r8d
9943 65,128,224,7, //and $0x7,%r8b
9944 196,65,52,87,201, //vxorps %ymm9,%ymm9,%ymm9
9945 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -05009946 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -07009947 15,135,80,255,255,255, //ja fc4 <_sk_load_8888_avx+0x14>
9948 69,15,182,192, //movzbl %r8b,%r8d
9949 76,141,13,137,0,0,0, //lea 0x89(%rip),%r9 # 1108 <_sk_load_8888_avx+0x158>
Mike Klein894d5612017-03-07 07:59:52 -05009950 75,99,4,129, //movslq (%r9,%r8,4),%rax
9951 76,1,200, //add %r9,%rax
9952 255,224, //jmpq *%rax
9953 196,193,121,110,68,186,24, //vmovd 0x18(%r10,%rdi,4),%xmm0
9954 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
9955 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
9956 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
9957 196,99,117,12,200,64, //vblendps $0x40,%ymm0,%ymm1,%ymm9
9958 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
9959 196,195,121,34,68,186,20,1, //vpinsrd $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
9960 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
9961 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
9962 196,195,121,34,68,186,16,0, //vpinsrd $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
9963 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
9964 196,195,49,34,68,186,12,3, //vpinsrd $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
9965 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
9966 196,195,49,34,68,186,8,2, //vpinsrd $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
9967 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
9968 196,195,49,34,68,186,4,1, //vpinsrd $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
9969 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
9970 196,195,49,34,4,186,0, //vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
9971 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
Mike Klein64b97482017-03-14 17:35:04 -07009972 233,188,254,255,255, //jmpq fc4 <_sk_load_8888_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05009973 238, //out %al,(%dx)
9974 255, //(bad)
9975 255, //(bad)
9976 255,224, //jmpq *%rax
9977 255, //(bad)
9978 255, //(bad)
9979 255,210, //callq *%rdx
9980 255, //(bad)
9981 255, //(bad)
9982 255,196, //inc %esp
9983 255, //(bad)
9984 255, //(bad)
9985 255,176,255,255,255,156, //pushq -0x63000001(%rax)
9986 255, //(bad)
9987 255, //(bad)
9988 255, //.byte 0xff
9989 128,255,255, //cmp $0xff,%bh
9990 255, //.byte 0xff
9991};
9992
9993CODE const uint8_t sk_store_8888_avx[] = {
9994 72,173, //lods %ds:(%rsi),%rax
9995 76,139,8, //mov (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009996 184,0,0,127,67, //mov $0x437f0000,%eax
9997 197,121,110,192, //vmovd %eax,%xmm8
9998 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9999 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -050010000 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
10001 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
10002 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
10003 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
10004 196,193,33,114,242,8, //vpslld $0x8,%xmm10,%xmm11
10005 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
10006 196,193,41,114,242,8, //vpslld $0x8,%xmm10,%xmm10
10007 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
10008 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
10009 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
10010 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
10011 196,193,33,114,242,16, //vpslld $0x10,%xmm10,%xmm11
10012 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
10013 196,193,41,114,242,16, //vpslld $0x10,%xmm10,%xmm10
10014 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
10015 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
10016 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
10017 196,193,33,114,240,24, //vpslld $0x18,%xmm8,%xmm11
10018 196,67,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm8
10019 196,193,57,114,240,24, //vpslld $0x18,%xmm8,%xmm8
10020 196,67,37,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm11,%ymm8
10021 196,65,45,86,192, //vorpd %ymm8,%ymm10,%ymm8
10022 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
10023 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010024 117,10, //jne 11c8 <_sk_store_8888_avx+0xa4>
Mike Klein894d5612017-03-07 07:59:52 -050010025 196,65,124,17,4,185, //vmovups %ymm8,(%r9,%rdi,4)
10026 72,173, //lods %ds:(%rsi),%rax
10027 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -070010028 65,137,200, //mov %ecx,%r8d
10029 65,128,224,7, //and $0x7,%r8b
10030 65,254,200, //dec %r8b
Mike Klein894d5612017-03-07 07:59:52 -050010031 65,128,248,6, //cmp $0x6,%r8b
Mike Klein64b97482017-03-14 17:35:04 -070010032 119,236, //ja 11c4 <_sk_store_8888_avx+0xa0>
10033 65,15,182,192, //movzbl %r8b,%eax
10034 76,141,5,85,0,0,0, //lea 0x55(%rip),%r8 # 1238 <_sk_store_8888_avx+0x114>
10035 73,99,4,128, //movslq (%r8,%rax,4),%rax
10036 76,1,192, //add %r8,%rax
Mike Klein894d5612017-03-07 07:59:52 -050010037 255,224, //jmpq *%rax
10038 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
10039 196,67,121,22,76,185,24,2, //vpextrd $0x2,%xmm9,0x18(%r9,%rdi,4)
10040 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
10041 196,67,121,22,76,185,20,1, //vpextrd $0x1,%xmm9,0x14(%r9,%rdi,4)
10042 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -070010043 196,65,122,17,76,185,16, //vmovss %xmm9,0x10(%r9,%rdi,4)
Mike Klein894d5612017-03-07 07:59:52 -050010044 196,67,121,22,68,185,12,3, //vpextrd $0x3,%xmm8,0xc(%r9,%rdi,4)
10045 196,67,121,22,68,185,8,2, //vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
10046 196,67,121,22,68,185,4,1, //vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
10047 196,65,121,126,4,185, //vmovd %xmm8,(%r9,%rdi,4)
Mike Klein64b97482017-03-14 17:35:04 -070010048 235,143, //jmp 11c4 <_sk_store_8888_avx+0xa0>
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010049 15,31,0, //nopl (%rax)
10050 245, //cmc
Mike Klein894d5612017-03-07 07:59:52 -050010051 255, //(bad)
10052 255, //(bad)
10053 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010054 237, //in (%dx),%eax
Mike Klein894d5612017-03-07 07:59:52 -050010055 255, //(bad)
10056 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010057 255,229, //jmpq *%rbp
10058 255, //(bad)
10059 255, //(bad)
10060 255, //(bad)
10061 221,255, //(bad)
10062 255, //(bad)
10063 255,208, //callq *%rax
10064 255, //(bad)
10065 255, //(bad)
10066 255,194, //inc %edx
Mike Klein894d5612017-03-07 07:59:52 -050010067 255, //(bad)
10068 255, //(bad)
10069 255, //.byte 0xff
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010070 180,255, //mov $0xff,%ah
Mike Klein894d5612017-03-07 07:59:52 -050010071 255, //(bad)
10072 255, //.byte 0xff
10073};
10074
10075CODE const uint8_t sk_load_f16_avx[] = {
10076 72,173, //lods %ds:(%rsi),%rax
10077 72,139,0, //mov (%rax),%rax
10078 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010079 15,133,2,1,0,0, //jne 1364 <_sk_load_f16_avx+0x110>
10080 197,121,16,4,248, //vmovupd (%rax,%rdi,8),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010081 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
10082 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
Mike Klein64b97482017-03-14 17:35:04 -070010083 197,122,111,76,248,48, //vmovdqu 0x30(%rax,%rdi,8),%xmm9
10084 197,185,97,194, //vpunpcklwd %xmm2,%xmm8,%xmm0
10085 197,185,105,210, //vpunpckhwd %xmm2,%xmm8,%xmm2
10086 196,193,97,97,201, //vpunpcklwd %xmm9,%xmm3,%xmm1
10087 196,193,97,105,217, //vpunpckhwd %xmm9,%xmm3,%xmm3
10088 197,121,97,194, //vpunpcklwd %xmm2,%xmm0,%xmm8
10089 197,249,105,194, //vpunpckhwd %xmm2,%xmm0,%xmm0
10090 197,241,97,211, //vpunpcklwd %xmm3,%xmm1,%xmm2
10091 197,113,105,203, //vpunpckhwd %xmm3,%xmm1,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050010092 184,0,4,0,4, //mov $0x4000400,%eax
10093 197,249,110,216, //vmovd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050010094 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -070010095 196,193,97,101,200, //vpcmpgtw %xmm8,%xmm3,%xmm1
10096 196,65,113,223,192, //vpandn %xmm8,%xmm1,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050010097 197,225,101,200, //vpcmpgtw %xmm0,%xmm3,%xmm1
10098 197,241,223,192, //vpandn %xmm0,%xmm1,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -070010099 197,225,101,202, //vpcmpgtw %xmm2,%xmm3,%xmm1
10100 197,241,223,202, //vpandn %xmm2,%xmm1,%xmm1
10101 196,193,97,101,209, //vpcmpgtw %xmm9,%xmm3,%xmm2
10102 196,193,105,223,209, //vpandn %xmm9,%xmm2,%xmm2
10103 196,66,121,51,208, //vpmovzxwd %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050010104 196,98,121,51,201, //vpmovzxwd %xmm1,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -070010105 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
10106 197,57,105,195, //vpunpckhwd %xmm3,%xmm8,%xmm8
10107 197,241,105,203, //vpunpckhwd %xmm3,%xmm1,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050010108 196,98,121,51,216, //vpmovzxwd %xmm0,%xmm11
Mike Klein64b97482017-03-14 17:35:04 -070010109 196,98,121,51,226, //vpmovzxwd %xmm2,%xmm12
10110 197,121,105,235, //vpunpckhwd %xmm3,%xmm0,%xmm13
10111 197,105,105,243, //vpunpckhwd %xmm3,%xmm2,%xmm14
10112 196,193,121,114,242,13, //vpslld $0xd,%xmm10,%xmm0
10113 196,193,105,114,241,13, //vpslld $0xd,%xmm9,%xmm2
10114 196,227,125,24,194,1, //vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -050010115 184,0,0,128,119, //mov $0x77800000,%eax
Mike Klein64b97482017-03-14 17:35:04 -070010116 197,249,110,208, //vmovd %eax,%xmm2
10117 197,249,112,210,0, //vpshufd $0x0,%xmm2,%xmm2
10118 196,99,109,24,202,1, //vinsertf128 $0x1,%xmm2,%ymm2,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -050010119 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
Mike Klein64b97482017-03-14 17:35:04 -070010120 196,193,105,114,240,13, //vpslld $0xd,%xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010121 197,241,114,241,13, //vpslld $0xd,%xmm1,%xmm1
10122 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
10123 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
Mike Klein64b97482017-03-14 17:35:04 -070010124 196,193,105,114,243,13, //vpslld $0xd,%xmm11,%xmm2
10125 196,193,97,114,244,13, //vpslld $0xd,%xmm12,%xmm3
10126 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -050010127 197,180,89,210, //vmulps %ymm2,%ymm9,%ymm2
10128 196,193,57,114,245,13, //vpslld $0xd,%xmm13,%xmm8
Mike Klein64b97482017-03-14 17:35:04 -070010129 196,193,97,114,246,13, //vpslld $0xd,%xmm14,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050010130 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
10131 197,180,89,219, //vmulps %ymm3,%ymm9,%ymm3
10132 72,173, //lods %ds:(%rsi),%rax
10133 255,224, //jmpq *%rax
Mike Klein64b97482017-03-14 17:35:04 -070010134 197,123,16,4,248, //vmovsd (%rax,%rdi,8),%xmm8
10135 196,65,49,239,201, //vpxor %xmm9,%xmm9,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050010136 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010137 116,79, //je 13c3 <_sk_load_f16_avx+0x16f>
10138 197,57,22,68,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010139 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010140 114,67, //jb 13c3 <_sk_load_f16_avx+0x16f>
Mike Klein894d5612017-03-07 07:59:52 -050010141 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
10142 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010143 116,68, //je 13d0 <_sk_load_f16_avx+0x17c>
Mike Klein894d5612017-03-07 07:59:52 -050010144 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
10145 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010146 114,56, //jb 13d0 <_sk_load_f16_avx+0x17c>
Mike Klein894d5612017-03-07 07:59:52 -050010147 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
10148 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010149 15,132,209,254,255,255, //je 1279 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -050010150 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
10151 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010152 15,130,193,254,255,255, //jb 1279 <_sk_load_f16_avx+0x25>
10153 197,122,126,76,248,48, //vmovq 0x30(%rax,%rdi,8),%xmm9
10154 233,182,254,255,255, //jmpq 1279 <_sk_load_f16_avx+0x25>
10155 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
10156 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
10157 233,169,254,255,255, //jmpq 1279 <_sk_load_f16_avx+0x25>
10158 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
10159 233,160,254,255,255, //jmpq 1279 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -050010160};
10161
10162CODE const uint8_t sk_store_f16_avx[] = {
10163 72,173, //lods %ds:(%rsi),%rax
Mike Klein5224f462017-03-07 17:29:54 -050010164 76,139,0, //mov (%rax),%r8
10165 184,0,0,128,7, //mov $0x7800000,%eax
10166 197,121,110,192, //vmovd %eax,%xmm8
10167 196,65,121,112,192,0, //vpshufd $0x0,%xmm8,%xmm8
10168 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -050010169 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
10170 196,67,125,25,202,1, //vextractf128 $0x1,%ymm9,%xmm10
10171 196,193,41,114,210,13, //vpsrld $0xd,%xmm10,%xmm10
10172 196,193,49,114,209,13, //vpsrld $0xd,%xmm9,%xmm9
10173 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
10174 196,67,125,25,220,1, //vextractf128 $0x1,%ymm11,%xmm12
10175 196,193,25,114,212,13, //vpsrld $0xd,%xmm12,%xmm12
10176 196,193,33,114,211,13, //vpsrld $0xd,%xmm11,%xmm11
10177 197,60,89,234, //vmulps %ymm2,%ymm8,%ymm13
10178 196,67,125,25,238,1, //vextractf128 $0x1,%ymm13,%xmm14
10179 196,193,9,114,214,13, //vpsrld $0xd,%xmm14,%xmm14
10180 196,193,17,114,213,13, //vpsrld $0xd,%xmm13,%xmm13
10181 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
10182 196,67,125,25,199,1, //vextractf128 $0x1,%ymm8,%xmm15
10183 196,193,1,114,215,13, //vpsrld $0xd,%xmm15,%xmm15
10184 196,193,57,114,208,13, //vpsrld $0xd,%xmm8,%xmm8
10185 196,193,33,115,251,2, //vpslldq $0x2,%xmm11,%xmm11
10186 196,65,33,235,201, //vpor %xmm9,%xmm11,%xmm9
10187 196,193,33,115,252,2, //vpslldq $0x2,%xmm12,%xmm11
10188 196,65,33,235,226, //vpor %xmm10,%xmm11,%xmm12
10189 196,193,57,115,248,2, //vpslldq $0x2,%xmm8,%xmm8
10190 196,65,57,235,197, //vpor %xmm13,%xmm8,%xmm8
10191 196,193,41,115,255,2, //vpslldq $0x2,%xmm15,%xmm10
10192 196,65,41,235,238, //vpor %xmm14,%xmm10,%xmm13
10193 196,65,49,98,216, //vpunpckldq %xmm8,%xmm9,%xmm11
10194 196,65,49,106,208, //vpunpckhdq %xmm8,%xmm9,%xmm10
10195 196,65,25,98,205, //vpunpckldq %xmm13,%xmm12,%xmm9
10196 196,65,25,106,197, //vpunpckhdq %xmm13,%xmm12,%xmm8
10197 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010198 117,31, //jne 14af <_sk_store_f16_avx+0xd6>
Mike Klein5224f462017-03-07 17:29:54 -050010199 196,65,120,17,28,248, //vmovups %xmm11,(%r8,%rdi,8)
10200 196,65,120,17,84,248,16, //vmovups %xmm10,0x10(%r8,%rdi,8)
10201 196,65,120,17,76,248,32, //vmovups %xmm9,0x20(%r8,%rdi,8)
10202 196,65,122,127,68,248,48, //vmovdqu %xmm8,0x30(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -050010203 72,173, //lods %ds:(%rsi),%rax
10204 255,224, //jmpq *%rax
Mike Klein5224f462017-03-07 17:29:54 -050010205 196,65,121,214,28,248, //vmovq %xmm11,(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -050010206 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010207 116,240, //je 14ab <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -050010208 196,65,121,23,92,248,8, //vmovhpd %xmm11,0x8(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -050010209 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010210 114,227, //jb 14ab <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -050010211 196,65,121,214,84,248,16, //vmovq %xmm10,0x10(%r8,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -070010212 116,218, //je 14ab <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -050010213 196,65,121,23,84,248,24, //vmovhpd %xmm10,0x18(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -050010214 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010215 114,205, //jb 14ab <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -050010216 196,65,121,214,76,248,32, //vmovq %xmm9,0x20(%r8,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -070010217 116,196, //je 14ab <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -050010218 196,65,121,23,76,248,40, //vmovhpd %xmm9,0x28(%r8,%rdi,8)
Mike Klein894d5612017-03-07 07:59:52 -050010219 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010220 114,183, //jb 14ab <_sk_store_f16_avx+0xd2>
Mike Klein5224f462017-03-07 17:29:54 -050010221 196,65,121,214,68,248,48, //vmovq %xmm8,0x30(%r8,%rdi,8)
Mike Klein64b97482017-03-14 17:35:04 -070010222 235,174, //jmp 14ab <_sk_store_f16_avx+0xd2>
Mike Klein894d5612017-03-07 07:59:52 -050010223};
10224
10225CODE const uint8_t sk_store_f32_avx[] = {
10226 72,173, //lods %ds:(%rsi),%rax
10227 76,139,0, //mov (%rax),%r8
10228 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
10229 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
10230 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
10231 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
10232 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
10233 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
10234 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
10235 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
10236 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
10237 72,133,201, //test %rcx,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010238 117,55, //jne 156a <_sk_store_f32_avx+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -050010239 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
10240 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
10241 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
10242 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
10243 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
10244 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
10245 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
10246 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
10247 72,173, //lods %ds:(%rsi),%rax
10248 255,224, //jmpq *%rax
10249 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
10250 72,131,249,1, //cmp $0x1,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010251 116,240, //je 1566 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -050010252 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
10253 72,131,249,3, //cmp $0x3,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010254 114,227, //jb 1566 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -050010255 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -070010256 116,218, //je 1566 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -050010257 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
10258 72,131,249,5, //cmp $0x5,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010259 114,205, //jb 1566 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -050010260 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -070010261 116,195, //je 1566 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -050010262 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
10263 72,131,249,7, //cmp $0x7,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070010264 114,181, //jb 1566 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -050010265 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Klein64b97482017-03-14 17:35:04 -070010266 235,171, //jmp 1566 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -050010267};
10268
10269CODE const uint8_t sk_clamp_x_avx[] = {
10270 72,173, //lods %ds:(%rsi),%rax
10271 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
10272 197,60,95,200, //vmaxps %ymm0,%ymm8,%ymm9
10273 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10274 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
10275 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
10276 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
10277 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
10278 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
10279 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
10280 72,173, //lods %ds:(%rsi),%rax
10281 255,224, //jmpq *%rax
10282};
10283
10284CODE const uint8_t sk_clamp_y_avx[] = {
10285 72,173, //lods %ds:(%rsi),%rax
10286 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
10287 197,60,95,201, //vmaxps %ymm1,%ymm8,%ymm9
10288 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10289 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
10290 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
10291 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
10292 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
10293 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
10294 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
10295 72,173, //lods %ds:(%rsi),%rax
10296 255,224, //jmpq *%rax
10297};
10298
10299CODE const uint8_t sk_repeat_x_avx[] = {
10300 72,173, //lods %ds:(%rsi),%rax
10301 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10302 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
10303 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
10304 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
10305 196,65,124,92,201, //vsubps %ymm9,%ymm0,%ymm9
10306 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
10307 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
10308 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
10309 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
10310 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
10311 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
10312 72,173, //lods %ds:(%rsi),%rax
10313 255,224, //jmpq *%rax
10314};
10315
10316CODE const uint8_t sk_repeat_y_avx[] = {
10317 72,173, //lods %ds:(%rsi),%rax
10318 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10319 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
10320 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
10321 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
10322 196,65,116,92,201, //vsubps %ymm9,%ymm1,%ymm9
10323 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
10324 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
10325 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
10326 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
10327 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
10328 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
10329 72,173, //lods %ds:(%rsi),%rax
10330 255,224, //jmpq *%rax
10331};
10332
10333CODE const uint8_t sk_mirror_x_avx[] = {
10334 72,173, //lods %ds:(%rsi),%rax
Mike Klein64b97482017-03-14 17:35:04 -070010335 197,121,110,0, //vmovd (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010336 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
10337 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
10338 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
10339 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
10340 196,227,121,4,192,0, //vpermilps $0x0,%xmm0,%xmm0
10341 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
10342 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
10343 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
10344 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
10345 197,172,92,192, //vsubps %ymm0,%ymm10,%ymm0
10346 196,193,124,92,193, //vsubps %ymm9,%ymm0,%ymm0
10347 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
10348 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
10349 197,60,84,192, //vandps %ymm0,%ymm8,%ymm8
10350 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
10351 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
10352 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
10353 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
10354 196,227,53,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
10355 197,188,93,192, //vminps %ymm0,%ymm8,%ymm0
10356 72,173, //lods %ds:(%rsi),%rax
10357 255,224, //jmpq *%rax
10358};
10359
10360CODE const uint8_t sk_mirror_y_avx[] = {
10361 72,173, //lods %ds:(%rsi),%rax
Mike Klein64b97482017-03-14 17:35:04 -070010362 197,121,110,0, //vmovd (%rax),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010363 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
10364 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
10365 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
10366 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
10367 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
10368 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
10369 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
10370 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
10371 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
10372 197,172,92,201, //vsubps %ymm1,%ymm10,%ymm1
10373 196,193,116,92,201, //vsubps %ymm9,%ymm1,%ymm1
10374 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
10375 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
10376 197,60,84,193, //vandps %ymm1,%ymm8,%ymm8
10377 196,99,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm1
10378 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
10379 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
10380 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
10381 196,227,53,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm9,%ymm1
10382 197,188,93,201, //vminps %ymm1,%ymm8,%ymm1
10383 72,173, //lods %ds:(%rsi),%rax
10384 255,224, //jmpq *%rax
10385};
10386
Mike Kleine9ed07d2017-03-07 12:28:11 -050010387CODE const uint8_t sk_luminance_to_alpha_avx[] = {
Mike Klein5224f462017-03-07 17:29:54 -050010388 184,208,179,89,62, //mov $0x3e59b3d0,%eax
10389 197,249,110,216, //vmovd %eax,%xmm3
10390 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
10391 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050010392 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -050010393 184,89,23,55,63, //mov $0x3f371759,%eax
10394 197,249,110,216, //vmovd %eax,%xmm3
10395 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
10396 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050010397 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
10398 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
Mike Klein5224f462017-03-07 17:29:54 -050010399 184,152,221,147,61, //mov $0x3d93dd98,%eax
10400 197,249,110,200, //vmovd %eax,%xmm1
10401 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
10402 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Kleine9ed07d2017-03-07 12:28:11 -050010403 197,244,89,202, //vmulps %ymm2,%ymm1,%ymm1
10404 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
10405 72,173, //lods %ds:(%rsi),%rax
10406 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
10407 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
10408 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
10409 255,224, //jmpq *%rax
10410};
10411
Mike Klein894d5612017-03-07 07:59:52 -050010412CODE const uint8_t sk_matrix_2x3_avx[] = {
10413 72,173, //lods %ds:(%rsi),%rax
10414 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10415 196,98,125,24,72,8, //vbroadcastss 0x8(%rax),%ymm9
10416 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
10417 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
10418 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10419 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
10420 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
10421 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
10422 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
10423 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
10424 197,172,89,201, //vmulps %ymm1,%ymm10,%ymm1
10425 196,193,116,88,203, //vaddps %ymm11,%ymm1,%ymm1
10426 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
10427 197,252,88,201, //vaddps %ymm1,%ymm0,%ymm1
10428 72,173, //lods %ds:(%rsi),%rax
10429 197,124,41,192, //vmovaps %ymm8,%ymm0
10430 255,224, //jmpq *%rax
10431};
10432
10433CODE const uint8_t sk_matrix_3x4_avx[] = {
10434 72,173, //lods %ds:(%rsi),%rax
10435 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10436 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
10437 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
10438 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
10439 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
10440 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10441 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
10442 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10443 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
10444 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
10445 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
10446 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
10447 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
10448 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
10449 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
10450 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10451 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
10452 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10453 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
10454 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10455 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
10456 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
10457 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
10458 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
10459 197,156,89,210, //vmulps %ymm2,%ymm12,%ymm2
10460 196,193,108,88,213, //vaddps %ymm13,%ymm2,%ymm2
10461 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
10462 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
10463 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
10464 197,252,88,209, //vaddps %ymm1,%ymm0,%ymm2
10465 72,173, //lods %ds:(%rsi),%rax
10466 197,124,41,192, //vmovaps %ymm8,%ymm0
10467 197,124,41,201, //vmovaps %ymm9,%ymm1
10468 255,224, //jmpq *%rax
10469};
10470
Mike Kleine9ed07d2017-03-07 12:28:11 -050010471CODE const uint8_t sk_matrix_4x5_avx[] = {
10472 72,173, //lods %ds:(%rsi),%rax
10473 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10474 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
10475 196,98,125,24,80,32, //vbroadcastss 0x20(%rax),%ymm10
10476 196,98,125,24,88,48, //vbroadcastss 0x30(%rax),%ymm11
10477 196,98,125,24,96,64, //vbroadcastss 0x40(%rax),%ymm12
10478 197,36,89,219, //vmulps %ymm3,%ymm11,%ymm11
10479 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10480 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
10481 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10482 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
10483 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10484 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
10485 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
10486 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
10487 196,98,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm10
10488 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
10489 196,98,125,24,96,52, //vbroadcastss 0x34(%rax),%ymm12
10490 196,98,125,24,104,68, //vbroadcastss 0x44(%rax),%ymm13
10491 197,28,89,227, //vmulps %ymm3,%ymm12,%ymm12
10492 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
10493 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
10494 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10495 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
10496 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10497 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
10498 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10499 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
10500 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
10501 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
10502 196,98,125,24,104,56, //vbroadcastss 0x38(%rax),%ymm13
10503 196,98,125,24,112,72, //vbroadcastss 0x48(%rax),%ymm14
10504 197,20,89,235, //vmulps %ymm3,%ymm13,%ymm13
10505 196,65,20,88,238, //vaddps %ymm14,%ymm13,%ymm13
10506 197,28,89,226, //vmulps %ymm2,%ymm12,%ymm12
10507 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
10508 197,36,89,217, //vmulps %ymm1,%ymm11,%ymm11
10509 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10510 197,44,89,208, //vmulps %ymm0,%ymm10,%ymm10
10511 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10512 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
10513 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
10514 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
10515 196,98,125,24,112,60, //vbroadcastss 0x3c(%rax),%ymm14
10516 196,98,125,24,120,76, //vbroadcastss 0x4c(%rax),%ymm15
10517 197,140,89,219, //vmulps %ymm3,%ymm14,%ymm3
10518 196,193,100,88,223, //vaddps %ymm15,%ymm3,%ymm3
10519 197,148,89,210, //vmulps %ymm2,%ymm13,%ymm2
10520 197,236,88,211, //vaddps %ymm3,%ymm2,%ymm2
10521 197,156,89,201, //vmulps %ymm1,%ymm12,%ymm1
10522 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
10523 197,164,89,192, //vmulps %ymm0,%ymm11,%ymm0
10524 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
10525 72,173, //lods %ds:(%rsi),%rax
10526 197,124,41,192, //vmovaps %ymm8,%ymm0
10527 197,124,41,201, //vmovaps %ymm9,%ymm1
10528 197,124,41,210, //vmovaps %ymm10,%ymm2
10529 255,224, //jmpq *%rax
10530};
10531
Mike Klein894d5612017-03-07 07:59:52 -050010532CODE const uint8_t sk_matrix_perspective_avx[] = {
10533 72,173, //lods %ds:(%rsi),%rax
10534 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10535 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
10536 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
10537 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
10538 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10539 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
10540 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
10541 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
10542 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
10543 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
10544 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
10545 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10546 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
10547 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10548 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
10549 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
10550 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
10551 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
10552 196,193,116,88,204, //vaddps %ymm12,%ymm1,%ymm1
10553 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
10554 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
10555 197,252,83,200, //vrcpps %ymm0,%ymm1
10556 197,188,89,193, //vmulps %ymm1,%ymm8,%ymm0
10557 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
10558 72,173, //lods %ds:(%rsi),%rax
10559 255,224, //jmpq *%rax
10560};
10561
10562CODE const uint8_t sk_linear_gradient_2stops_avx[] = {
10563 72,173, //lods %ds:(%rsi),%rax
10564 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
10565 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
10566 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
10567 197,108,88,193, //vaddps %ymm1,%ymm2,%ymm8
10568 196,226,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm1
10569 196,226,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm2
10570 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
10571 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
10572 196,226,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm2
10573 196,226,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm3
10574 197,236,89,208, //vmulps %ymm0,%ymm2,%ymm2
10575 197,228,88,210, //vaddps %ymm2,%ymm3,%ymm2
10576 196,226,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm3
10577 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
10578 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
10579 197,180,88,216, //vaddps %ymm0,%ymm9,%ymm3
10580 72,173, //lods %ds:(%rsi),%rax
10581 197,124,41,192, //vmovaps %ymm8,%ymm0
10582 255,224, //jmpq *%rax
10583};
10584
10585CODE const uint8_t sk_start_pipeline_sse41[] = {
10586 65,87, //push %r15
10587 65,86, //push %r14
10588 65,85, //push %r13
10589 65,84, //push %r12
10590 86, //push %rsi
10591 87, //push %rdi
10592 83, //push %rbx
10593 72,129,236,160,0,0,0, //sub $0xa0,%rsp
10594 68,15,41,188,36,144,0,0,0, //movaps %xmm15,0x90(%rsp)
10595 68,15,41,180,36,128,0,0,0, //movaps %xmm14,0x80(%rsp)
10596 68,15,41,108,36,112, //movaps %xmm13,0x70(%rsp)
10597 68,15,41,100,36,96, //movaps %xmm12,0x60(%rsp)
10598 68,15,41,92,36,80, //movaps %xmm11,0x50(%rsp)
10599 68,15,41,84,36,64, //movaps %xmm10,0x40(%rsp)
10600 68,15,41,76,36,48, //movaps %xmm9,0x30(%rsp)
10601 68,15,41,68,36,32, //movaps %xmm8,0x20(%rsp)
10602 15,41,124,36,16, //movaps %xmm7,0x10(%rsp)
10603 15,41,52,36, //movaps %xmm6,(%rsp)
10604 77,137,207, //mov %r9,%r15
10605 77,137,198, //mov %r8,%r14
10606 72,137,203, //mov %rcx,%rbx
10607 72,137,214, //mov %rdx,%rsi
10608 72,173, //lods %ds:(%rsi),%rax
10609 73,137,196, //mov %rax,%r12
10610 73,137,245, //mov %rsi,%r13
10611 72,141,67,4, //lea 0x4(%rbx),%rax
10612 76,57,248, //cmp %r15,%rax
10613 118,5, //jbe 73 <_sk_start_pipeline_sse41+0x73>
10614 72,137,216, //mov %rbx,%rax
10615 235,52, //jmp a7 <_sk_start_pipeline_sse41+0xa7>
10616 15,87,192, //xorps %xmm0,%xmm0
10617 15,87,201, //xorps %xmm1,%xmm1
10618 15,87,210, //xorps %xmm2,%xmm2
10619 15,87,219, //xorps %xmm3,%xmm3
10620 15,87,228, //xorps %xmm4,%xmm4
10621 15,87,237, //xorps %xmm5,%xmm5
10622 15,87,246, //xorps %xmm6,%xmm6
10623 15,87,255, //xorps %xmm7,%xmm7
10624 72,137,223, //mov %rbx,%rdi
10625 76,137,238, //mov %r13,%rsi
10626 76,137,242, //mov %r14,%rdx
10627 65,255,212, //callq *%r12
10628 72,141,67,4, //lea 0x4(%rbx),%rax
10629 72,131,195,8, //add $0x8,%rbx
10630 76,57,251, //cmp %r15,%rbx
10631 72,137,195, //mov %rax,%rbx
10632 118,204, //jbe 73 <_sk_start_pipeline_sse41+0x73>
10633 15,40,52,36, //movaps (%rsp),%xmm6
10634 15,40,124,36,16, //movaps 0x10(%rsp),%xmm7
10635 68,15,40,68,36,32, //movaps 0x20(%rsp),%xmm8
10636 68,15,40,76,36,48, //movaps 0x30(%rsp),%xmm9
10637 68,15,40,84,36,64, //movaps 0x40(%rsp),%xmm10
10638 68,15,40,92,36,80, //movaps 0x50(%rsp),%xmm11
10639 68,15,40,100,36,96, //movaps 0x60(%rsp),%xmm12
10640 68,15,40,108,36,112, //movaps 0x70(%rsp),%xmm13
10641 68,15,40,180,36,128,0,0,0, //movaps 0x80(%rsp),%xmm14
10642 68,15,40,188,36,144,0,0,0, //movaps 0x90(%rsp),%xmm15
10643 72,129,196,160,0,0,0, //add $0xa0,%rsp
10644 91, //pop %rbx
10645 95, //pop %rdi
10646 94, //pop %rsi
10647 65,92, //pop %r12
10648 65,93, //pop %r13
10649 65,94, //pop %r14
10650 65,95, //pop %r15
10651 195, //retq
10652};
10653
10654CODE const uint8_t sk_just_return_sse41[] = {
10655 195, //retq
10656};
10657
10658CODE const uint8_t sk_seed_shader_sse41[] = {
10659 72,173, //lods %ds:(%rsi),%rax
10660 102,15,110,199, //movd %edi,%xmm0
10661 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
10662 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010663 185,0,0,0,63, //mov $0x3f000000,%ecx
10664 102,15,110,209, //movd %ecx,%xmm2
10665 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10666 15,88,202, //addps %xmm2,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050010667 15,16,2, //movups (%rdx),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050010668 15,88,193, //addps %xmm1,%xmm0
10669 102,15,110,8, //movd (%rax),%xmm1
10670 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
10671 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010672 15,88,202, //addps %xmm2,%xmm1
10673 184,0,0,128,63, //mov $0x3f800000,%eax
10674 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010675 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10676 72,173, //lods %ds:(%rsi),%rax
10677 15,87,219, //xorps %xmm3,%xmm3
10678 15,87,228, //xorps %xmm4,%xmm4
10679 15,87,237, //xorps %xmm5,%xmm5
10680 15,87,246, //xorps %xmm6,%xmm6
10681 15,87,255, //xorps %xmm7,%xmm7
10682 255,224, //jmpq *%rax
10683};
10684
10685CODE const uint8_t sk_constant_color_sse41[] = {
10686 72,173, //lods %ds:(%rsi),%rax
10687 15,16,24, //movups (%rax),%xmm3
10688 15,40,195, //movaps %xmm3,%xmm0
10689 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
10690 15,40,203, //movaps %xmm3,%xmm1
10691 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
10692 15,40,211, //movaps %xmm3,%xmm2
10693 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
10694 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
10695 72,173, //lods %ds:(%rsi),%rax
10696 255,224, //jmpq *%rax
10697};
10698
10699CODE const uint8_t sk_clear_sse41[] = {
10700 72,173, //lods %ds:(%rsi),%rax
10701 15,87,192, //xorps %xmm0,%xmm0
10702 15,87,201, //xorps %xmm1,%xmm1
10703 15,87,210, //xorps %xmm2,%xmm2
10704 15,87,219, //xorps %xmm3,%xmm3
10705 255,224, //jmpq *%rax
10706};
10707
10708CODE const uint8_t sk_plus__sse41[] = {
10709 15,88,196, //addps %xmm4,%xmm0
10710 15,88,205, //addps %xmm5,%xmm1
10711 15,88,214, //addps %xmm6,%xmm2
10712 15,88,223, //addps %xmm7,%xmm3
10713 72,173, //lods %ds:(%rsi),%rax
10714 255,224, //jmpq *%rax
10715};
10716
10717CODE const uint8_t sk_srcover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010718 184,0,0,128,63, //mov $0x3f800000,%eax
10719 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010720 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10721 68,15,92,195, //subps %xmm3,%xmm8
10722 69,15,40,200, //movaps %xmm8,%xmm9
10723 68,15,89,204, //mulps %xmm4,%xmm9
10724 65,15,88,193, //addps %xmm9,%xmm0
10725 69,15,40,200, //movaps %xmm8,%xmm9
10726 68,15,89,205, //mulps %xmm5,%xmm9
10727 65,15,88,201, //addps %xmm9,%xmm1
10728 69,15,40,200, //movaps %xmm8,%xmm9
10729 68,15,89,206, //mulps %xmm6,%xmm9
10730 65,15,88,209, //addps %xmm9,%xmm2
10731 68,15,89,199, //mulps %xmm7,%xmm8
10732 65,15,88,216, //addps %xmm8,%xmm3
10733 72,173, //lods %ds:(%rsi),%rax
10734 255,224, //jmpq *%rax
10735};
10736
10737CODE const uint8_t sk_dstover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010738 184,0,0,128,63, //mov $0x3f800000,%eax
10739 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010740 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10741 68,15,92,199, //subps %xmm7,%xmm8
10742 65,15,89,192, //mulps %xmm8,%xmm0
10743 15,88,196, //addps %xmm4,%xmm0
10744 65,15,89,200, //mulps %xmm8,%xmm1
10745 15,88,205, //addps %xmm5,%xmm1
10746 65,15,89,208, //mulps %xmm8,%xmm2
10747 15,88,214, //addps %xmm6,%xmm2
10748 65,15,89,216, //mulps %xmm8,%xmm3
10749 15,88,223, //addps %xmm7,%xmm3
10750 72,173, //lods %ds:(%rsi),%rax
10751 255,224, //jmpq *%rax
10752};
10753
10754CODE const uint8_t sk_clamp_0_sse41[] = {
10755 69,15,87,192, //xorps %xmm8,%xmm8
10756 65,15,95,192, //maxps %xmm8,%xmm0
10757 65,15,95,200, //maxps %xmm8,%xmm1
10758 65,15,95,208, //maxps %xmm8,%xmm2
10759 65,15,95,216, //maxps %xmm8,%xmm3
10760 72,173, //lods %ds:(%rsi),%rax
10761 255,224, //jmpq *%rax
10762};
10763
10764CODE const uint8_t sk_clamp_1_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010765 184,0,0,128,63, //mov $0x3f800000,%eax
10766 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010767 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10768 65,15,93,192, //minps %xmm8,%xmm0
10769 65,15,93,200, //minps %xmm8,%xmm1
10770 65,15,93,208, //minps %xmm8,%xmm2
10771 65,15,93,216, //minps %xmm8,%xmm3
10772 72,173, //lods %ds:(%rsi),%rax
10773 255,224, //jmpq *%rax
10774};
10775
10776CODE const uint8_t sk_clamp_a_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010777 184,0,0,128,63, //mov $0x3f800000,%eax
10778 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010779 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10780 65,15,93,216, //minps %xmm8,%xmm3
10781 15,93,195, //minps %xmm3,%xmm0
10782 15,93,203, //minps %xmm3,%xmm1
10783 15,93,211, //minps %xmm3,%xmm2
10784 72,173, //lods %ds:(%rsi),%rax
10785 255,224, //jmpq *%rax
10786};
10787
10788CODE const uint8_t sk_set_rgb_sse41[] = {
10789 72,173, //lods %ds:(%rsi),%rax
10790 243,15,16,0, //movss (%rax),%xmm0
10791 243,15,16,72,4, //movss 0x4(%rax),%xmm1
10792 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
10793 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
10794 243,15,16,80,8, //movss 0x8(%rax),%xmm2
10795 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10796 72,173, //lods %ds:(%rsi),%rax
10797 255,224, //jmpq *%rax
10798};
10799
10800CODE const uint8_t sk_swap_rb_sse41[] = {
10801 68,15,40,192, //movaps %xmm0,%xmm8
10802 72,173, //lods %ds:(%rsi),%rax
10803 15,40,194, //movaps %xmm2,%xmm0
10804 65,15,40,208, //movaps %xmm8,%xmm2
10805 255,224, //jmpq *%rax
10806};
10807
10808CODE const uint8_t sk_swap_sse41[] = {
10809 68,15,40,195, //movaps %xmm3,%xmm8
10810 68,15,40,202, //movaps %xmm2,%xmm9
10811 68,15,40,209, //movaps %xmm1,%xmm10
10812 68,15,40,216, //movaps %xmm0,%xmm11
10813 72,173, //lods %ds:(%rsi),%rax
10814 15,40,196, //movaps %xmm4,%xmm0
10815 15,40,205, //movaps %xmm5,%xmm1
10816 15,40,214, //movaps %xmm6,%xmm2
10817 15,40,223, //movaps %xmm7,%xmm3
10818 65,15,40,227, //movaps %xmm11,%xmm4
10819 65,15,40,234, //movaps %xmm10,%xmm5
10820 65,15,40,241, //movaps %xmm9,%xmm6
10821 65,15,40,248, //movaps %xmm8,%xmm7
10822 255,224, //jmpq *%rax
10823};
10824
10825CODE const uint8_t sk_move_src_dst_sse41[] = {
10826 72,173, //lods %ds:(%rsi),%rax
10827 15,40,224, //movaps %xmm0,%xmm4
10828 15,40,233, //movaps %xmm1,%xmm5
10829 15,40,242, //movaps %xmm2,%xmm6
10830 15,40,251, //movaps %xmm3,%xmm7
10831 255,224, //jmpq *%rax
10832};
10833
10834CODE const uint8_t sk_move_dst_src_sse41[] = {
10835 72,173, //lods %ds:(%rsi),%rax
10836 15,40,196, //movaps %xmm4,%xmm0
10837 15,40,205, //movaps %xmm5,%xmm1
10838 15,40,214, //movaps %xmm6,%xmm2
10839 15,40,223, //movaps %xmm7,%xmm3
10840 255,224, //jmpq *%rax
10841};
10842
10843CODE const uint8_t sk_premul_sse41[] = {
10844 15,89,195, //mulps %xmm3,%xmm0
10845 15,89,203, //mulps %xmm3,%xmm1
10846 15,89,211, //mulps %xmm3,%xmm2
10847 72,173, //lods %ds:(%rsi),%rax
10848 255,224, //jmpq *%rax
10849};
10850
10851CODE const uint8_t sk_unpremul_sse41[] = {
Mike Klein64b97482017-03-14 17:35:04 -070010852 69,15,87,192, //xorps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010853 184,0,0,128,63, //mov $0x3f800000,%eax
Mike Klein64b97482017-03-14 17:35:04 -070010854 102,68,15,110,200, //movd %eax,%xmm9
10855 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
10856 68,15,94,203, //divps %xmm3,%xmm9
10857 68,15,194,195,4, //cmpneqps %xmm3,%xmm8
10858 69,15,84,193, //andps %xmm9,%xmm8
10859 65,15,89,192, //mulps %xmm8,%xmm0
10860 65,15,89,200, //mulps %xmm8,%xmm1
10861 65,15,89,208, //mulps %xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010862 72,173, //lods %ds:(%rsi),%rax
Mike Klein894d5612017-03-07 07:59:52 -050010863 255,224, //jmpq *%rax
10864};
10865
10866CODE const uint8_t sk_from_srgb_sse41[] = {
Mike Klein5224f462017-03-07 17:29:54 -050010867 184,145,131,158,61, //mov $0x3d9e8391,%eax
10868 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050010869 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
10870 69,15,40,211, //movaps %xmm11,%xmm10
10871 68,15,89,208, //mulps %xmm0,%xmm10
10872 68,15,40,240, //movaps %xmm0,%xmm14
10873 69,15,89,246, //mulps %xmm14,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -050010874 184,154,153,153,62, //mov $0x3e99999a,%eax
10875 102,68,15,110,192, //movd %eax,%xmm8
10876 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10877 184,92,143,50,63, //mov $0x3f328f5c,%eax
10878 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050010879 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -050010880 69,15,40,200, //movaps %xmm8,%xmm9
10881 68,15,89,200, //mulps %xmm0,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050010882 69,15,88,204, //addps %xmm12,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050010883 184,10,215,35,59, //mov $0x3b23d70a,%eax
10884 102,68,15,110,232, //movd %eax,%xmm13
10885 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
10886 69,15,89,206, //mulps %xmm14,%xmm9
10887 69,15,88,205, //addps %xmm13,%xmm9
10888 184,174,71,97,61, //mov $0x3d6147ae,%eax
10889 102,68,15,110,240, //movd %eax,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -050010890 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
10891 65,15,194,198,1, //cmpltps %xmm14,%xmm0
10892 102,69,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm9
10893 69,15,40,251, //movaps %xmm11,%xmm15
10894 68,15,89,249, //mulps %xmm1,%xmm15
10895 15,40,193, //movaps %xmm1,%xmm0
10896 15,89,192, //mulps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050010897 69,15,40,208, //movaps %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050010898 68,15,89,209, //mulps %xmm1,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050010899 69,15,88,212, //addps %xmm12,%xmm10
Mike Klein5224f462017-03-07 17:29:54 -050010900 68,15,89,208, //mulps %xmm0,%xmm10
10901 69,15,88,213, //addps %xmm13,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050010902 65,15,194,206,1, //cmpltps %xmm14,%xmm1
10903 15,40,193, //movaps %xmm1,%xmm0
10904 102,69,15,56,20,215, //blendvps %xmm0,%xmm15,%xmm10
Mike Klein5224f462017-03-07 17:29:54 -050010905 68,15,89,218, //mulps %xmm2,%xmm11
10906 15,40,194, //movaps %xmm2,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050010907 15,89,192, //mulps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050010908 68,15,89,194, //mulps %xmm2,%xmm8
10909 69,15,88,196, //addps %xmm12,%xmm8
10910 68,15,89,192, //mulps %xmm0,%xmm8
10911 69,15,88,197, //addps %xmm13,%xmm8
10912 65,15,194,214,1, //cmpltps %xmm14,%xmm2
10913 15,40,194, //movaps %xmm2,%xmm0
10914 102,69,15,56,20,195, //blendvps %xmm0,%xmm11,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010915 72,173, //lods %ds:(%rsi),%rax
10916 65,15,40,193, //movaps %xmm9,%xmm0
10917 65,15,40,202, //movaps %xmm10,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050010918 65,15,40,208, //movaps %xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010919 255,224, //jmpq *%rax
10920};
10921
10922CODE const uint8_t sk_to_srgb_sse41[] = {
10923 72,131,236,24, //sub $0x18,%rsp
10924 15,41,60,36, //movaps %xmm7,(%rsp)
10925 15,40,254, //movaps %xmm6,%xmm7
10926 15,40,245, //movaps %xmm5,%xmm6
10927 15,40,236, //movaps %xmm4,%xmm5
10928 15,40,227, //movaps %xmm3,%xmm4
Mike Klein5224f462017-03-07 17:29:54 -050010929 15,40,218, //movaps %xmm2,%xmm3
10930 15,40,209, //movaps %xmm1,%xmm2
10931 68,15,82,192, //rsqrtps %xmm0,%xmm8
10932 69,15,83,200, //rcpps %xmm8,%xmm9
10933 69,15,82,248, //rsqrtps %xmm8,%xmm15
10934 184,41,92,71,65, //mov $0x41475c29,%eax
10935 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050010936 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
Mike Klein5224f462017-03-07 17:29:54 -050010937 69,15,40,211, //movaps %xmm11,%xmm10
10938 68,15,89,208, //mulps %xmm0,%xmm10
10939 184,0,0,128,63, //mov $0x3f800000,%eax
10940 102,68,15,110,192, //movd %eax,%xmm8
10941 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10942 184,194,135,210,62, //mov $0x3ed287c2,%eax
10943 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050010944 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -050010945 184,206,111,48,63, //mov $0x3f306fce,%eax
10946 102,68,15,110,232, //movd %eax,%xmm13
Mike Klein894d5612017-03-07 07:59:52 -050010947 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
Mike Klein5224f462017-03-07 17:29:54 -050010948 184,168,87,202,61, //mov $0x3dca57a8,%eax
10949 53,0,0,0,128, //xor $0x80000000,%eax
10950 102,68,15,110,240, //movd %eax,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -050010951 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
10952 69,15,89,205, //mulps %xmm13,%xmm9
10953 69,15,88,206, //addps %xmm14,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050010954 69,15,89,252, //mulps %xmm12,%xmm15
10955 69,15,88,249, //addps %xmm9,%xmm15
10956 69,15,40,200, //movaps %xmm8,%xmm9
10957 69,15,93,207, //minps %xmm15,%xmm9
10958 184,4,231,140,59, //mov $0x3b8ce704,%eax
10959 102,68,15,110,248, //movd %eax,%xmm15
Mike Klein894d5612017-03-07 07:59:52 -050010960 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
10961 65,15,194,199,1, //cmpltps %xmm15,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050010962 102,69,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm9
10963 68,15,82,210, //rsqrtps %xmm2,%xmm10
10964 65,15,83,194, //rcpps %xmm10,%xmm0
10965 69,15,82,210, //rsqrtps %xmm10,%xmm10
10966 65,15,89,197, //mulps %xmm13,%xmm0
10967 65,15,88,198, //addps %xmm14,%xmm0
10968 69,15,89,212, //mulps %xmm12,%xmm10
10969 68,15,88,208, //addps %xmm0,%xmm10
10970 65,15,40,200, //movaps %xmm8,%xmm1
10971 65,15,93,202, //minps %xmm10,%xmm1
10972 69,15,40,211, //movaps %xmm11,%xmm10
10973 68,15,89,210, //mulps %xmm2,%xmm10
10974 65,15,194,215,1, //cmpltps %xmm15,%xmm2
10975 15,40,194, //movaps %xmm2,%xmm0
10976 102,65,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050010977 15,82,195, //rsqrtps %xmm3,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050010978 15,83,208, //rcpps %xmm0,%xmm2
10979 65,15,89,213, //mulps %xmm13,%xmm2
10980 65,15,88,214, //addps %xmm14,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010981 15,82,192, //rsqrtps %xmm0,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050010982 65,15,89,196, //mulps %xmm12,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050010983 15,88,194, //addps %xmm2,%xmm0
10984 68,15,93,192, //minps %xmm0,%xmm8
10985 68,15,89,219, //mulps %xmm3,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050010986 65,15,194,223,1, //cmpltps %xmm15,%xmm3
10987 15,40,195, //movaps %xmm3,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050010988 102,69,15,56,20,195, //blendvps %xmm0,%xmm11,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010989 72,173, //lods %ds:(%rsi),%rax
10990 65,15,40,193, //movaps %xmm9,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050010991 65,15,40,208, //movaps %xmm8,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010992 15,40,220, //movaps %xmm4,%xmm3
10993 15,40,229, //movaps %xmm5,%xmm4
10994 15,40,238, //movaps %xmm6,%xmm5
10995 15,40,247, //movaps %xmm7,%xmm6
10996 15,40,60,36, //movaps (%rsp),%xmm7
10997 72,131,196,24, //add $0x18,%rsp
10998 255,224, //jmpq *%rax
10999};
11000
11001CODE const uint8_t sk_scale_1_float_sse41[] = {
11002 72,173, //lods %ds:(%rsi),%rax
11003 243,68,15,16,0, //movss (%rax),%xmm8
11004 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11005 65,15,89,192, //mulps %xmm8,%xmm0
11006 65,15,89,200, //mulps %xmm8,%xmm1
11007 65,15,89,208, //mulps %xmm8,%xmm2
11008 65,15,89,216, //mulps %xmm8,%xmm3
11009 72,173, //lods %ds:(%rsi),%rax
11010 255,224, //jmpq *%rax
11011};
11012
11013CODE const uint8_t sk_scale_u8_sse41[] = {
11014 72,173, //lods %ds:(%rsi),%rax
11015 72,139,0, //mov (%rax),%rax
11016 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
11017 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011018 184,129,128,128,59, //mov $0x3b808081,%eax
11019 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011020 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11021 69,15,89,200, //mulps %xmm8,%xmm9
11022 65,15,89,193, //mulps %xmm9,%xmm0
11023 65,15,89,201, //mulps %xmm9,%xmm1
11024 65,15,89,209, //mulps %xmm9,%xmm2
11025 65,15,89,217, //mulps %xmm9,%xmm3
11026 72,173, //lods %ds:(%rsi),%rax
11027 255,224, //jmpq *%rax
11028};
11029
11030CODE const uint8_t sk_lerp_1_float_sse41[] = {
11031 72,173, //lods %ds:(%rsi),%rax
11032 243,68,15,16,0, //movss (%rax),%xmm8
11033 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11034 15,92,196, //subps %xmm4,%xmm0
11035 65,15,89,192, //mulps %xmm8,%xmm0
11036 15,88,196, //addps %xmm4,%xmm0
11037 15,92,205, //subps %xmm5,%xmm1
11038 65,15,89,200, //mulps %xmm8,%xmm1
11039 15,88,205, //addps %xmm5,%xmm1
11040 15,92,214, //subps %xmm6,%xmm2
11041 65,15,89,208, //mulps %xmm8,%xmm2
11042 15,88,214, //addps %xmm6,%xmm2
11043 15,92,223, //subps %xmm7,%xmm3
11044 65,15,89,216, //mulps %xmm8,%xmm3
11045 15,88,223, //addps %xmm7,%xmm3
11046 72,173, //lods %ds:(%rsi),%rax
11047 255,224, //jmpq *%rax
11048};
11049
11050CODE const uint8_t sk_lerp_u8_sse41[] = {
11051 72,173, //lods %ds:(%rsi),%rax
11052 72,139,0, //mov (%rax),%rax
11053 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
11054 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011055 184,129,128,128,59, //mov $0x3b808081,%eax
11056 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011057 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11058 69,15,89,200, //mulps %xmm8,%xmm9
11059 15,92,196, //subps %xmm4,%xmm0
11060 65,15,89,193, //mulps %xmm9,%xmm0
11061 15,88,196, //addps %xmm4,%xmm0
11062 15,92,205, //subps %xmm5,%xmm1
11063 65,15,89,201, //mulps %xmm9,%xmm1
11064 15,88,205, //addps %xmm5,%xmm1
11065 15,92,214, //subps %xmm6,%xmm2
11066 65,15,89,209, //mulps %xmm9,%xmm2
11067 15,88,214, //addps %xmm6,%xmm2
11068 15,92,223, //subps %xmm7,%xmm3
11069 65,15,89,217, //mulps %xmm9,%xmm3
11070 15,88,223, //addps %xmm7,%xmm3
11071 72,173, //lods %ds:(%rsi),%rax
11072 255,224, //jmpq *%rax
11073};
11074
11075CODE const uint8_t sk_lerp_565_sse41[] = {
11076 72,173, //lods %ds:(%rsi),%rax
11077 72,139,0, //mov (%rax),%rax
11078 102,68,15,56,51,4,120, //pmovzxwd (%rax,%rdi,2),%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050011079 184,0,248,0,0, //mov $0xf800,%eax
11080 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011081 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
11082 102,65,15,219,216, //pand %xmm8,%xmm3
11083 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050011084 184,8,33,132,55, //mov $0x37842108,%eax
11085 102,68,15,110,208, //movd %eax,%xmm10
11086 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11087 69,15,89,209, //mulps %xmm9,%xmm10
11088 184,224,7,0,0, //mov $0x7e0,%eax
11089 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011090 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
11091 102,65,15,219,216, //pand %xmm8,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050011092 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
11093 184,33,8,2,58, //mov $0x3a020821,%eax
11094 102,68,15,110,216, //movd %eax,%xmm11
11095 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11096 69,15,89,217, //mulps %xmm9,%xmm11
11097 184,31,0,0,0, //mov $0x1f,%eax
11098 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011099 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
11100 102,65,15,219,216, //pand %xmm8,%xmm3
11101 68,15,91,195, //cvtdq2ps %xmm3,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050011102 184,8,33,4,61, //mov $0x3d042108,%eax
11103 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011104 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11105 65,15,89,216, //mulps %xmm8,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011106 15,92,196, //subps %xmm4,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050011107 65,15,89,194, //mulps %xmm10,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011108 15,88,196, //addps %xmm4,%xmm0
11109 15,92,205, //subps %xmm5,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050011110 65,15,89,203, //mulps %xmm11,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050011111 15,88,205, //addps %xmm5,%xmm1
11112 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011113 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050011114 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011115 184,0,0,128,63, //mov $0x3f800000,%eax
11116 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011117 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11118 72,173, //lods %ds:(%rsi),%rax
11119 255,224, //jmpq *%rax
11120};
11121
11122CODE const uint8_t sk_load_tables_sse41[] = {
11123 72,173, //lods %ds:(%rsi),%rax
11124 72,139,8, //mov (%rax),%rcx
11125 76,139,64,8, //mov 0x8(%rax),%r8
11126 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050011127 185,255,0,0,0, //mov $0xff,%ecx
11128 102,15,110,193, //movd %ecx,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011129 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
11130 102,65,15,111,200, //movdqa %xmm8,%xmm1
11131 102,15,114,209,8, //psrld $0x8,%xmm1
11132 102,15,219,200, //pand %xmm0,%xmm1
11133 102,65,15,111,208, //movdqa %xmm8,%xmm2
11134 102,15,114,210,16, //psrld $0x10,%xmm2
11135 102,15,219,208, //pand %xmm0,%xmm2
11136 102,65,15,219,192, //pand %xmm8,%xmm0
11137 102,72,15,58,22,193,1, //pextrq $0x1,%xmm0,%rcx
11138 65,137,201, //mov %ecx,%r9d
11139 72,193,233,32, //shr $0x20,%rcx
11140 102,73,15,126,194, //movq %xmm0,%r10
11141 69,137,211, //mov %r10d,%r11d
11142 73,193,234,32, //shr $0x20,%r10
11143 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
11144 102,67,15,58,33,4,144,16, //insertps $0x10,(%r8,%r10,4),%xmm0
11145 102,67,15,58,33,4,136,32, //insertps $0x20,(%r8,%r9,4),%xmm0
11146 102,65,15,58,33,4,136,48, //insertps $0x30,(%r8,%rcx,4),%xmm0
Mike Klein64b97482017-03-14 17:35:04 -070011147 76,139,64,16, //mov 0x10(%rax),%r8
11148 102,73,15,58,22,202,1, //pextrq $0x1,%xmm1,%r10
11149 77,137,209, //mov %r10,%r9
Mike Klein894d5612017-03-07 07:59:52 -050011150 73,193,233,32, //shr $0x20,%r9
Mike Klein64b97482017-03-14 17:35:04 -070011151 102,72,15,126,201, //movq %xmm1,%rcx
11152 65,137,203, //mov %ecx,%r11d
11153 65,129,227,255,255,255,0, //and $0xffffff,%r11d
11154 72,193,233,30, //shr $0x1e,%rcx
11155 65,129,226,255,255,255,0, //and $0xffffff,%r10d
11156 243,67,15,16,12,152, //movss (%r8,%r11,4),%xmm1
11157 102,65,15,58,33,12,8,16, //insertps $0x10,(%r8,%rcx,1),%xmm1
11158 243,67,15,16,28,144, //movss (%r8,%r10,4),%xmm3
11159 102,15,58,33,203,32, //insertps $0x20,%xmm3,%xmm1
11160 243,67,15,16,28,136, //movss (%r8,%r9,4),%xmm3
11161 102,15,58,33,203,48, //insertps $0x30,%xmm3,%xmm1
11162 76,139,72,24, //mov 0x18(%rax),%r9
11163 102,72,15,58,22,209,1, //pextrq $0x1,%xmm2,%rcx
11164 68,15,183,193, //movzwl %cx,%r8d
11165 72,193,233,32, //shr $0x20,%rcx
11166 102,72,15,126,208, //movq %xmm2,%rax
11167 68,15,183,208, //movzwl %ax,%r10d
11168 72,193,232,30, //shr $0x1e,%rax
11169 243,67,15,16,20,145, //movss (%r9,%r10,4),%xmm2
11170 102,65,15,58,33,20,1,16, //insertps $0x10,(%r9,%rax,1),%xmm2
11171 243,67,15,16,28,129, //movss (%r9,%r8,4),%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011172 102,15,58,33,211,32, //insertps $0x20,%xmm3,%xmm2
Mike Klein64b97482017-03-14 17:35:04 -070011173 243,65,15,16,28,137, //movss (%r9,%rcx,4),%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011174 102,15,58,33,211,48, //insertps $0x30,%xmm3,%xmm2
11175 102,65,15,114,208,24, //psrld $0x18,%xmm8
11176 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050011177 184,129,128,128,59, //mov $0x3b808081,%eax
11178 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011179 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11180 65,15,89,216, //mulps %xmm8,%xmm3
11181 72,173, //lods %ds:(%rsi),%rax
11182 255,224, //jmpq *%rax
11183};
11184
11185CODE const uint8_t sk_load_a8_sse41[] = {
11186 72,173, //lods %ds:(%rsi),%rax
11187 72,139,0, //mov (%rax),%rax
11188 102,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm0
11189 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050011190 184,129,128,128,59, //mov $0x3b808081,%eax
11191 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011192 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11193 15,89,216, //mulps %xmm0,%xmm3
11194 72,173, //lods %ds:(%rsi),%rax
11195 15,87,192, //xorps %xmm0,%xmm0
11196 15,87,201, //xorps %xmm1,%xmm1
11197 15,87,210, //xorps %xmm2,%xmm2
11198 255,224, //jmpq *%rax
11199};
11200
11201CODE const uint8_t sk_store_a8_sse41[] = {
11202 72,173, //lods %ds:(%rsi),%rax
11203 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050011204 185,0,0,127,67, //mov $0x437f0000,%ecx
11205 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011206 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11207 68,15,89,195, //mulps %xmm3,%xmm8
11208 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
11209 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
11210 102,69,15,103,192, //packuswb %xmm8,%xmm8
11211 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
11212 72,173, //lods %ds:(%rsi),%rax
11213 255,224, //jmpq *%rax
11214};
11215
11216CODE const uint8_t sk_load_565_sse41[] = {
11217 72,173, //lods %ds:(%rsi),%rax
11218 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050011219 102,15,56,51,20,120, //pmovzxwd (%rax,%rdi,2),%xmm2
11220 184,0,248,0,0, //mov $0xf800,%eax
11221 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011222 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050011223 102,15,219,194, //pand %xmm2,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011224 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050011225 184,8,33,132,55, //mov $0x37842108,%eax
11226 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011227 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11228 15,89,193, //mulps %xmm1,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050011229 184,224,7,0,0, //mov $0x7e0,%eax
11230 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050011231 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050011232 102,15,219,202, //pand %xmm2,%xmm1
11233 15,91,217, //cvtdq2ps %xmm1,%xmm3
11234 184,33,8,2,58, //mov $0x3a020821,%eax
11235 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050011236 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050011237 15,89,203, //mulps %xmm3,%xmm1
11238 184,31,0,0,0, //mov $0x1f,%eax
11239 102,15,110,216, //movd %eax,%xmm3
11240 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
11241 102,15,219,218, //pand %xmm2,%xmm3
11242 15,91,219, //cvtdq2ps %xmm3,%xmm3
11243 184,8,33,4,61, //mov $0x3d042108,%eax
11244 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050011245 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -050011246 15,89,211, //mulps %xmm3,%xmm2
11247 184,0,0,128,63, //mov $0x3f800000,%eax
11248 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011249 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11250 72,173, //lods %ds:(%rsi),%rax
11251 255,224, //jmpq *%rax
11252};
11253
11254CODE const uint8_t sk_store_565_sse41[] = {
11255 72,173, //lods %ds:(%rsi),%rax
11256 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050011257 185,0,0,248,65, //mov $0x41f80000,%ecx
11258 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011259 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050011260 69,15,40,200, //movaps %xmm8,%xmm9
11261 68,15,89,200, //mulps %xmm0,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011262 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050011263 102,65,15,114,241,11, //pslld $0xb,%xmm9
11264 185,0,0,124,66, //mov $0x427c0000,%ecx
11265 102,68,15,110,209, //movd %ecx,%xmm10
11266 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11267 68,15,89,209, //mulps %xmm1,%xmm10
11268 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
11269 102,65,15,114,242,5, //pslld $0x5,%xmm10
11270 102,69,15,235,209, //por %xmm9,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050011271 68,15,89,194, //mulps %xmm2,%xmm8
11272 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050011273 102,69,15,86,194, //orpd %xmm10,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011274 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
11275 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
11276 72,173, //lods %ds:(%rsi),%rax
11277 255,224, //jmpq *%rax
11278};
11279
11280CODE const uint8_t sk_load_8888_sse41[] = {
11281 72,173, //lods %ds:(%rsi),%rax
11282 72,139,0, //mov (%rax),%rax
11283 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011284 184,255,0,0,0, //mov $0xff,%eax
11285 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011286 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
11287 102,15,111,203, //movdqa %xmm3,%xmm1
11288 102,15,114,209,8, //psrld $0x8,%xmm1
11289 102,15,219,200, //pand %xmm0,%xmm1
11290 102,15,111,211, //movdqa %xmm3,%xmm2
11291 102,15,114,210,16, //psrld $0x10,%xmm2
11292 102,15,219,208, //pand %xmm0,%xmm2
11293 102,15,219,195, //pand %xmm3,%xmm0
11294 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011295 184,129,128,128,59, //mov $0x3b808081,%eax
11296 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011297 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11298 65,15,89,192, //mulps %xmm8,%xmm0
11299 15,91,201, //cvtdq2ps %xmm1,%xmm1
11300 65,15,89,200, //mulps %xmm8,%xmm1
11301 15,91,210, //cvtdq2ps %xmm2,%xmm2
11302 65,15,89,208, //mulps %xmm8,%xmm2
11303 102,15,114,211,24, //psrld $0x18,%xmm3
11304 15,91,219, //cvtdq2ps %xmm3,%xmm3
11305 65,15,89,216, //mulps %xmm8,%xmm3
11306 72,173, //lods %ds:(%rsi),%rax
11307 255,224, //jmpq *%rax
11308};
11309
11310CODE const uint8_t sk_store_8888_sse41[] = {
11311 72,173, //lods %ds:(%rsi),%rax
11312 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011313 185,0,0,127,67, //mov $0x437f0000,%ecx
11314 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011315 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11316 69,15,40,200, //movaps %xmm8,%xmm9
11317 68,15,89,200, //mulps %xmm0,%xmm9
11318 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
11319 69,15,40,208, //movaps %xmm8,%xmm10
11320 68,15,89,209, //mulps %xmm1,%xmm10
11321 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
11322 102,65,15,114,242,8, //pslld $0x8,%xmm10
11323 102,69,15,235,209, //por %xmm9,%xmm10
11324 69,15,40,200, //movaps %xmm8,%xmm9
11325 68,15,89,202, //mulps %xmm2,%xmm9
11326 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
11327 102,65,15,114,241,16, //pslld $0x10,%xmm9
11328 68,15,89,195, //mulps %xmm3,%xmm8
11329 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
11330 102,65,15,114,240,24, //pslld $0x18,%xmm8
11331 102,69,15,235,193, //por %xmm9,%xmm8
11332 102,69,15,235,194, //por %xmm10,%xmm8
11333 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
11334 72,173, //lods %ds:(%rsi),%rax
11335 255,224, //jmpq *%rax
11336};
11337
11338CODE const uint8_t sk_load_f16_sse41[] = {
11339 72,173, //lods %ds:(%rsi),%rax
11340 72,139,0, //mov (%rax),%rax
11341 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
11342 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
11343 102,15,111,208, //movdqa %xmm0,%xmm2
11344 102,15,97,209, //punpcklwd %xmm1,%xmm2
11345 102,15,105,193, //punpckhwd %xmm1,%xmm0
11346 102,68,15,111,194, //movdqa %xmm2,%xmm8
11347 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
11348 102,15,105,208, //punpckhwd %xmm0,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -050011349 184,0,4,0,4, //mov $0x4000400,%eax
11350 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011351 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
11352 102,15,111,203, //movdqa %xmm3,%xmm1
11353 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
11354 102,65,15,223,200, //pandn %xmm8,%xmm1
11355 102,15,101,218, //pcmpgtw %xmm2,%xmm3
11356 102,15,223,218, //pandn %xmm2,%xmm3
11357 102,15,56,51,193, //pmovzxwd %xmm1,%xmm0
11358 102,15,114,240,13, //pslld $0xd,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050011359 184,0,0,128,119, //mov $0x77800000,%eax
11360 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050011361 102,68,15,112,194,0, //pshufd $0x0,%xmm2,%xmm8
11362 65,15,89,192, //mulps %xmm8,%xmm0
11363 102,69,15,239,201, //pxor %xmm9,%xmm9
11364 102,65,15,105,201, //punpckhwd %xmm9,%xmm1
11365 102,15,114,241,13, //pslld $0xd,%xmm1
11366 65,15,89,200, //mulps %xmm8,%xmm1
11367 102,15,56,51,211, //pmovzxwd %xmm3,%xmm2
11368 102,15,114,242,13, //pslld $0xd,%xmm2
11369 65,15,89,208, //mulps %xmm8,%xmm2
11370 102,65,15,105,217, //punpckhwd %xmm9,%xmm3
11371 102,15,114,243,13, //pslld $0xd,%xmm3
11372 65,15,89,216, //mulps %xmm8,%xmm3
11373 72,173, //lods %ds:(%rsi),%rax
11374 255,224, //jmpq *%rax
11375};
11376
11377CODE const uint8_t sk_store_f16_sse41[] = {
11378 72,173, //lods %ds:(%rsi),%rax
11379 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050011380 185,0,0,128,7, //mov $0x7800000,%ecx
11381 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011382 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
11383 102,69,15,111,200, //movdqa %xmm8,%xmm9
11384 68,15,89,200, //mulps %xmm0,%xmm9
11385 102,65,15,114,209,13, //psrld $0xd,%xmm9
11386 102,69,15,111,208, //movdqa %xmm8,%xmm10
11387 68,15,89,209, //mulps %xmm1,%xmm10
11388 102,65,15,114,210,13, //psrld $0xd,%xmm10
11389 102,69,15,111,216, //movdqa %xmm8,%xmm11
11390 68,15,89,218, //mulps %xmm2,%xmm11
11391 102,65,15,114,211,13, //psrld $0xd,%xmm11
11392 68,15,89,195, //mulps %xmm3,%xmm8
11393 102,65,15,114,208,13, //psrld $0xd,%xmm8
11394 102,65,15,115,250,2, //pslldq $0x2,%xmm10
11395 102,69,15,235,209, //por %xmm9,%xmm10
11396 102,65,15,115,248,2, //pslldq $0x2,%xmm8
11397 102,69,15,235,195, //por %xmm11,%xmm8
11398 102,69,15,111,202, //movdqa %xmm10,%xmm9
11399 102,69,15,98,200, //punpckldq %xmm8,%xmm9
11400 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
11401 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
11402 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
11403 72,173, //lods %ds:(%rsi),%rax
11404 255,224, //jmpq *%rax
11405};
11406
11407CODE const uint8_t sk_store_f32_sse41[] = {
11408 72,173, //lods %ds:(%rsi),%rax
11409 72,139,0, //mov (%rax),%rax
11410 72,137,249, //mov %rdi,%rcx
11411 72,193,225,4, //shl $0x4,%rcx
11412 68,15,40,192, //movaps %xmm0,%xmm8
11413 68,15,40,200, //movaps %xmm0,%xmm9
11414 68,15,20,201, //unpcklps %xmm1,%xmm9
11415 68,15,40,210, //movaps %xmm2,%xmm10
11416 68,15,40,218, //movaps %xmm2,%xmm11
11417 68,15,20,219, //unpcklps %xmm3,%xmm11
11418 68,15,21,193, //unpckhps %xmm1,%xmm8
11419 68,15,21,211, //unpckhps %xmm3,%xmm10
11420 69,15,40,225, //movaps %xmm9,%xmm12
11421 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -070011422 69,15,18,217, //movhlps %xmm9,%xmm11
11423 69,15,40,200, //movaps %xmm8,%xmm9
11424 102,69,15,20,202, //unpcklpd %xmm10,%xmm9
11425 69,15,18,208, //movhlps %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050011426 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
Mike Klein64b97482017-03-14 17:35:04 -070011427 68,15,17,92,8,16, //movups %xmm11,0x10(%rax,%rcx,1)
11428 102,68,15,17,76,8,32, //movupd %xmm9,0x20(%rax,%rcx,1)
11429 68,15,17,84,8,48, //movups %xmm10,0x30(%rax,%rcx,1)
Mike Klein894d5612017-03-07 07:59:52 -050011430 72,173, //lods %ds:(%rsi),%rax
11431 255,224, //jmpq *%rax
11432};
11433
11434CODE const uint8_t sk_clamp_x_sse41[] = {
11435 72,173, //lods %ds:(%rsi),%rax
11436 69,15,87,192, //xorps %xmm8,%xmm8
11437 68,15,95,192, //maxps %xmm0,%xmm8
11438 243,68,15,16,8, //movss (%rax),%xmm9
11439 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11440 102,15,118,192, //pcmpeqd %xmm0,%xmm0
11441 102,65,15,254,193, //paddd %xmm9,%xmm0
11442 68,15,93,192, //minps %xmm0,%xmm8
11443 72,173, //lods %ds:(%rsi),%rax
11444 65,15,40,192, //movaps %xmm8,%xmm0
11445 255,224, //jmpq *%rax
11446};
11447
11448CODE const uint8_t sk_clamp_y_sse41[] = {
11449 72,173, //lods %ds:(%rsi),%rax
11450 69,15,87,192, //xorps %xmm8,%xmm8
11451 68,15,95,193, //maxps %xmm1,%xmm8
11452 243,68,15,16,8, //movss (%rax),%xmm9
11453 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11454 102,15,118,201, //pcmpeqd %xmm1,%xmm1
11455 102,65,15,254,201, //paddd %xmm9,%xmm1
11456 68,15,93,193, //minps %xmm1,%xmm8
11457 72,173, //lods %ds:(%rsi),%rax
11458 65,15,40,200, //movaps %xmm8,%xmm1
11459 255,224, //jmpq *%rax
11460};
11461
11462CODE const uint8_t sk_repeat_x_sse41[] = {
11463 72,173, //lods %ds:(%rsi),%rax
11464 243,68,15,16,0, //movss (%rax),%xmm8
11465 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11466 68,15,40,200, //movaps %xmm0,%xmm9
11467 69,15,94,200, //divps %xmm8,%xmm9
11468 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
11469 69,15,89,200, //mulps %xmm8,%xmm9
11470 65,15,92,193, //subps %xmm9,%xmm0
11471 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
11472 102,69,15,254,200, //paddd %xmm8,%xmm9
11473 65,15,93,193, //minps %xmm9,%xmm0
11474 72,173, //lods %ds:(%rsi),%rax
11475 255,224, //jmpq *%rax
11476};
11477
11478CODE const uint8_t sk_repeat_y_sse41[] = {
11479 72,173, //lods %ds:(%rsi),%rax
11480 243,68,15,16,0, //movss (%rax),%xmm8
11481 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11482 68,15,40,201, //movaps %xmm1,%xmm9
11483 69,15,94,200, //divps %xmm8,%xmm9
11484 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
11485 69,15,89,200, //mulps %xmm8,%xmm9
11486 65,15,92,201, //subps %xmm9,%xmm1
11487 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
11488 102,69,15,254,200, //paddd %xmm8,%xmm9
11489 65,15,93,201, //minps %xmm9,%xmm1
11490 72,173, //lods %ds:(%rsi),%rax
11491 255,224, //jmpq *%rax
11492};
11493
11494CODE const uint8_t sk_mirror_x_sse41[] = {
11495 72,173, //lods %ds:(%rsi),%rax
11496 243,68,15,16,0, //movss (%rax),%xmm8
11497 69,15,40,200, //movaps %xmm8,%xmm9
11498 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11499 65,15,92,193, //subps %xmm9,%xmm0
11500 243,69,15,88,192, //addss %xmm8,%xmm8
11501 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11502 68,15,40,208, //movaps %xmm0,%xmm10
11503 69,15,94,208, //divps %xmm8,%xmm10
11504 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
11505 69,15,89,208, //mulps %xmm8,%xmm10
11506 65,15,92,194, //subps %xmm10,%xmm0
11507 65,15,92,193, //subps %xmm9,%xmm0
11508 69,15,87,192, //xorps %xmm8,%xmm8
11509 68,15,92,192, //subps %xmm0,%xmm8
11510 65,15,84,192, //andps %xmm8,%xmm0
11511 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
11512 102,69,15,254,193, //paddd %xmm9,%xmm8
11513 65,15,93,192, //minps %xmm8,%xmm0
11514 72,173, //lods %ds:(%rsi),%rax
11515 255,224, //jmpq *%rax
11516};
11517
11518CODE const uint8_t sk_mirror_y_sse41[] = {
11519 72,173, //lods %ds:(%rsi),%rax
11520 243,68,15,16,0, //movss (%rax),%xmm8
11521 69,15,40,200, //movaps %xmm8,%xmm9
11522 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11523 65,15,92,201, //subps %xmm9,%xmm1
11524 243,69,15,88,192, //addss %xmm8,%xmm8
11525 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11526 68,15,40,209, //movaps %xmm1,%xmm10
11527 69,15,94,208, //divps %xmm8,%xmm10
11528 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
11529 69,15,89,208, //mulps %xmm8,%xmm10
11530 65,15,92,202, //subps %xmm10,%xmm1
11531 65,15,92,201, //subps %xmm9,%xmm1
11532 69,15,87,192, //xorps %xmm8,%xmm8
11533 68,15,92,193, //subps %xmm1,%xmm8
11534 65,15,84,200, //andps %xmm8,%xmm1
11535 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
11536 102,69,15,254,193, //paddd %xmm9,%xmm8
11537 65,15,93,200, //minps %xmm8,%xmm1
11538 72,173, //lods %ds:(%rsi),%rax
11539 255,224, //jmpq *%rax
11540};
11541
Mike Kleine9ed07d2017-03-07 12:28:11 -050011542CODE const uint8_t sk_luminance_to_alpha_sse41[] = {
Mike Klein5224f462017-03-07 17:29:54 -050011543 184,208,179,89,62, //mov $0x3e59b3d0,%eax
11544 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050011545 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11546 15,89,216, //mulps %xmm0,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050011547 184,89,23,55,63, //mov $0x3f371759,%eax
11548 102,15,110,192, //movd %eax,%xmm0
11549 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11550 15,89,193, //mulps %xmm1,%xmm0
11551 15,88,195, //addps %xmm3,%xmm0
11552 184,152,221,147,61, //mov $0x3d93dd98,%eax
11553 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050011554 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11555 15,89,218, //mulps %xmm2,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050011556 15,88,216, //addps %xmm0,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050011557 72,173, //lods %ds:(%rsi),%rax
11558 15,87,192, //xorps %xmm0,%xmm0
11559 15,87,201, //xorps %xmm1,%xmm1
11560 15,87,210, //xorps %xmm2,%xmm2
11561 255,224, //jmpq *%rax
11562};
11563
Mike Klein894d5612017-03-07 07:59:52 -050011564CODE const uint8_t sk_matrix_2x3_sse41[] = {
11565 68,15,40,201, //movaps %xmm1,%xmm9
11566 68,15,40,192, //movaps %xmm0,%xmm8
11567 72,173, //lods %ds:(%rsi),%rax
11568 243,15,16,0, //movss (%rax),%xmm0
11569 243,15,16,72,4, //movss 0x4(%rax),%xmm1
11570 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11571 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11572 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11573 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
11574 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11575 69,15,89,209, //mulps %xmm9,%xmm10
11576 69,15,88,211, //addps %xmm11,%xmm10
11577 65,15,89,192, //mulps %xmm8,%xmm0
11578 65,15,88,194, //addps %xmm10,%xmm0
11579 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11580 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
11581 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11582 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
11583 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11584 69,15,89,209, //mulps %xmm9,%xmm10
11585 69,15,88,211, //addps %xmm11,%xmm10
11586 65,15,89,200, //mulps %xmm8,%xmm1
11587 65,15,88,202, //addps %xmm10,%xmm1
11588 72,173, //lods %ds:(%rsi),%rax
11589 255,224, //jmpq *%rax
11590};
11591
11592CODE const uint8_t sk_matrix_3x4_sse41[] = {
11593 68,15,40,201, //movaps %xmm1,%xmm9
11594 68,15,40,192, //movaps %xmm0,%xmm8
11595 72,173, //lods %ds:(%rsi),%rax
11596 243,15,16,0, //movss (%rax),%xmm0
11597 243,15,16,72,4, //movss 0x4(%rax),%xmm1
11598 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11599 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
11600 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11601 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
11602 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11603 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
11604 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11605 68,15,89,218, //mulps %xmm2,%xmm11
11606 69,15,88,220, //addps %xmm12,%xmm11
11607 69,15,89,209, //mulps %xmm9,%xmm10
11608 69,15,88,211, //addps %xmm11,%xmm10
11609 65,15,89,192, //mulps %xmm8,%xmm0
11610 65,15,88,194, //addps %xmm10,%xmm0
11611 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11612 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
11613 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11614 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
11615 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11616 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
11617 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11618 68,15,89,218, //mulps %xmm2,%xmm11
11619 69,15,88,220, //addps %xmm12,%xmm11
11620 69,15,89,209, //mulps %xmm9,%xmm10
11621 69,15,88,211, //addps %xmm11,%xmm10
11622 65,15,89,200, //mulps %xmm8,%xmm1
11623 65,15,88,202, //addps %xmm10,%xmm1
11624 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11625 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11626 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
11627 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11628 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
11629 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11630 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
11631 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11632 68,15,89,226, //mulps %xmm2,%xmm12
11633 69,15,88,229, //addps %xmm13,%xmm12
11634 69,15,89,217, //mulps %xmm9,%xmm11
11635 69,15,88,220, //addps %xmm12,%xmm11
11636 69,15,89,208, //mulps %xmm8,%xmm10
11637 69,15,88,211, //addps %xmm11,%xmm10
11638 72,173, //lods %ds:(%rsi),%rax
11639 65,15,40,210, //movaps %xmm10,%xmm2
11640 255,224, //jmpq *%rax
11641};
11642
Mike Kleine9ed07d2017-03-07 12:28:11 -050011643CODE const uint8_t sk_matrix_4x5_sse41[] = {
11644 68,15,40,201, //movaps %xmm1,%xmm9
11645 68,15,40,192, //movaps %xmm0,%xmm8
11646 72,173, //lods %ds:(%rsi),%rax
11647 243,15,16,0, //movss (%rax),%xmm0
11648 243,15,16,72,4, //movss 0x4(%rax),%xmm1
11649 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11650 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
11651 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11652 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
11653 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11654 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
11655 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11656 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
11657 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11658 68,15,89,227, //mulps %xmm3,%xmm12
11659 69,15,88,229, //addps %xmm13,%xmm12
11660 68,15,89,218, //mulps %xmm2,%xmm11
11661 69,15,88,220, //addps %xmm12,%xmm11
11662 69,15,89,209, //mulps %xmm9,%xmm10
11663 69,15,88,211, //addps %xmm11,%xmm10
11664 65,15,89,192, //mulps %xmm8,%xmm0
11665 65,15,88,194, //addps %xmm10,%xmm0
11666 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11667 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
11668 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11669 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
11670 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11671 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
11672 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11673 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
11674 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11675 68,15,89,227, //mulps %xmm3,%xmm12
11676 69,15,88,229, //addps %xmm13,%xmm12
11677 68,15,89,218, //mulps %xmm2,%xmm11
11678 69,15,88,220, //addps %xmm12,%xmm11
11679 69,15,89,209, //mulps %xmm9,%xmm10
11680 69,15,88,211, //addps %xmm11,%xmm10
11681 65,15,89,200, //mulps %xmm8,%xmm1
11682 65,15,88,202, //addps %xmm10,%xmm1
11683 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11684 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11685 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
11686 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11687 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
11688 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11689 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
11690 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11691 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
11692 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
11693 68,15,89,235, //mulps %xmm3,%xmm13
11694 69,15,88,238, //addps %xmm14,%xmm13
11695 68,15,89,226, //mulps %xmm2,%xmm12
11696 69,15,88,229, //addps %xmm13,%xmm12
11697 69,15,89,217, //mulps %xmm9,%xmm11
11698 69,15,88,220, //addps %xmm12,%xmm11
11699 69,15,89,208, //mulps %xmm8,%xmm10
11700 69,15,88,211, //addps %xmm11,%xmm10
11701 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
11702 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11703 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
11704 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11705 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
11706 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11707 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
11708 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
11709 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
11710 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
11711 68,15,89,243, //mulps %xmm3,%xmm14
11712 69,15,88,247, //addps %xmm15,%xmm14
11713 68,15,89,234, //mulps %xmm2,%xmm13
11714 69,15,88,238, //addps %xmm14,%xmm13
11715 69,15,89,225, //mulps %xmm9,%xmm12
11716 69,15,88,229, //addps %xmm13,%xmm12
11717 69,15,89,216, //mulps %xmm8,%xmm11
11718 69,15,88,220, //addps %xmm12,%xmm11
11719 72,173, //lods %ds:(%rsi),%rax
11720 65,15,40,210, //movaps %xmm10,%xmm2
11721 65,15,40,219, //movaps %xmm11,%xmm3
11722 255,224, //jmpq *%rax
11723};
11724
Mike Klein894d5612017-03-07 07:59:52 -050011725CODE const uint8_t sk_matrix_perspective_sse41[] = {
11726 68,15,40,192, //movaps %xmm0,%xmm8
11727 72,173, //lods %ds:(%rsi),%rax
11728 243,15,16,0, //movss (%rax),%xmm0
11729 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
11730 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11731 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11732 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11733 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11734 68,15,89,201, //mulps %xmm1,%xmm9
11735 69,15,88,202, //addps %xmm10,%xmm9
11736 65,15,89,192, //mulps %xmm8,%xmm0
11737 65,15,88,193, //addps %xmm9,%xmm0
11738 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
11739 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11740 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
11741 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11742 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
11743 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11744 68,15,89,209, //mulps %xmm1,%xmm10
11745 69,15,88,211, //addps %xmm11,%xmm10
11746 69,15,89,200, //mulps %xmm8,%xmm9
11747 69,15,88,202, //addps %xmm10,%xmm9
11748 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
11749 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11750 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
11751 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11752 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
11753 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11754 68,15,89,217, //mulps %xmm1,%xmm11
11755 69,15,88,220, //addps %xmm12,%xmm11
11756 69,15,89,208, //mulps %xmm8,%xmm10
11757 69,15,88,211, //addps %xmm11,%xmm10
11758 65,15,83,202, //rcpps %xmm10,%xmm1
11759 15,89,193, //mulps %xmm1,%xmm0
11760 68,15,89,201, //mulps %xmm1,%xmm9
11761 72,173, //lods %ds:(%rsi),%rax
11762 65,15,40,201, //movaps %xmm9,%xmm1
11763 255,224, //jmpq *%rax
11764};
11765
11766CODE const uint8_t sk_linear_gradient_2stops_sse41[] = {
11767 72,173, //lods %ds:(%rsi),%rax
11768 68,15,16,8, //movups (%rax),%xmm9
11769 15,16,88,16, //movups 0x10(%rax),%xmm3
11770 68,15,40,195, //movaps %xmm3,%xmm8
11771 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11772 65,15,40,201, //movaps %xmm9,%xmm1
11773 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11774 68,15,89,192, //mulps %xmm0,%xmm8
11775 68,15,88,193, //addps %xmm1,%xmm8
11776 15,40,203, //movaps %xmm3,%xmm1
11777 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
11778 65,15,40,209, //movaps %xmm9,%xmm2
11779 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
11780 15,89,200, //mulps %xmm0,%xmm1
11781 15,88,202, //addps %xmm2,%xmm1
11782 15,40,211, //movaps %xmm3,%xmm2
11783 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
11784 69,15,40,209, //movaps %xmm9,%xmm10
11785 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
11786 15,89,208, //mulps %xmm0,%xmm2
11787 65,15,88,210, //addps %xmm10,%xmm2
11788 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
11789 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
11790 15,89,216, //mulps %xmm0,%xmm3
11791 65,15,88,217, //addps %xmm9,%xmm3
11792 72,173, //lods %ds:(%rsi),%rax
11793 65,15,40,192, //movaps %xmm8,%xmm0
11794 255,224, //jmpq *%rax
11795};
11796
11797CODE const uint8_t sk_start_pipeline_sse2[] = {
11798 65,87, //push %r15
11799 65,86, //push %r14
11800 65,85, //push %r13
11801 65,84, //push %r12
11802 86, //push %rsi
11803 87, //push %rdi
11804 83, //push %rbx
11805 72,129,236,160,0,0,0, //sub $0xa0,%rsp
11806 68,15,41,188,36,144,0,0,0, //movaps %xmm15,0x90(%rsp)
11807 68,15,41,180,36,128,0,0,0, //movaps %xmm14,0x80(%rsp)
11808 68,15,41,108,36,112, //movaps %xmm13,0x70(%rsp)
11809 68,15,41,100,36,96, //movaps %xmm12,0x60(%rsp)
11810 68,15,41,92,36,80, //movaps %xmm11,0x50(%rsp)
11811 68,15,41,84,36,64, //movaps %xmm10,0x40(%rsp)
11812 68,15,41,76,36,48, //movaps %xmm9,0x30(%rsp)
11813 68,15,41,68,36,32, //movaps %xmm8,0x20(%rsp)
11814 15,41,124,36,16, //movaps %xmm7,0x10(%rsp)
11815 15,41,52,36, //movaps %xmm6,(%rsp)
11816 77,137,207, //mov %r9,%r15
11817 77,137,198, //mov %r8,%r14
11818 72,137,203, //mov %rcx,%rbx
11819 72,137,214, //mov %rdx,%rsi
11820 72,173, //lods %ds:(%rsi),%rax
11821 73,137,196, //mov %rax,%r12
11822 73,137,245, //mov %rsi,%r13
11823 72,141,67,4, //lea 0x4(%rbx),%rax
11824 76,57,248, //cmp %r15,%rax
11825 118,5, //jbe 73 <_sk_start_pipeline_sse2+0x73>
11826 72,137,216, //mov %rbx,%rax
11827 235,52, //jmp a7 <_sk_start_pipeline_sse2+0xa7>
11828 15,87,192, //xorps %xmm0,%xmm0
11829 15,87,201, //xorps %xmm1,%xmm1
11830 15,87,210, //xorps %xmm2,%xmm2
11831 15,87,219, //xorps %xmm3,%xmm3
11832 15,87,228, //xorps %xmm4,%xmm4
11833 15,87,237, //xorps %xmm5,%xmm5
11834 15,87,246, //xorps %xmm6,%xmm6
11835 15,87,255, //xorps %xmm7,%xmm7
11836 72,137,223, //mov %rbx,%rdi
11837 76,137,238, //mov %r13,%rsi
11838 76,137,242, //mov %r14,%rdx
11839 65,255,212, //callq *%r12
11840 72,141,67,4, //lea 0x4(%rbx),%rax
11841 72,131,195,8, //add $0x8,%rbx
11842 76,57,251, //cmp %r15,%rbx
11843 72,137,195, //mov %rax,%rbx
11844 118,204, //jbe 73 <_sk_start_pipeline_sse2+0x73>
11845 15,40,52,36, //movaps (%rsp),%xmm6
11846 15,40,124,36,16, //movaps 0x10(%rsp),%xmm7
11847 68,15,40,68,36,32, //movaps 0x20(%rsp),%xmm8
11848 68,15,40,76,36,48, //movaps 0x30(%rsp),%xmm9
11849 68,15,40,84,36,64, //movaps 0x40(%rsp),%xmm10
11850 68,15,40,92,36,80, //movaps 0x50(%rsp),%xmm11
11851 68,15,40,100,36,96, //movaps 0x60(%rsp),%xmm12
11852 68,15,40,108,36,112, //movaps 0x70(%rsp),%xmm13
11853 68,15,40,180,36,128,0,0,0, //movaps 0x80(%rsp),%xmm14
11854 68,15,40,188,36,144,0,0,0, //movaps 0x90(%rsp),%xmm15
11855 72,129,196,160,0,0,0, //add $0xa0,%rsp
11856 91, //pop %rbx
11857 95, //pop %rdi
11858 94, //pop %rsi
11859 65,92, //pop %r12
11860 65,93, //pop %r13
11861 65,94, //pop %r14
11862 65,95, //pop %r15
11863 195, //retq
11864};
11865
11866CODE const uint8_t sk_just_return_sse2[] = {
11867 195, //retq
11868};
11869
11870CODE const uint8_t sk_seed_shader_sse2[] = {
11871 72,173, //lods %ds:(%rsi),%rax
11872 102,15,110,199, //movd %edi,%xmm0
11873 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
11874 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011875 185,0,0,0,63, //mov $0x3f000000,%ecx
11876 102,15,110,209, //movd %ecx,%xmm2
11877 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
11878 15,88,202, //addps %xmm2,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050011879 15,16,2, //movups (%rdx),%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011880 15,88,193, //addps %xmm1,%xmm0
11881 102,15,110,8, //movd (%rax),%xmm1
11882 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
11883 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011884 15,88,202, //addps %xmm2,%xmm1
11885 184,0,0,128,63, //mov $0x3f800000,%eax
11886 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050011887 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
11888 72,173, //lods %ds:(%rsi),%rax
11889 15,87,219, //xorps %xmm3,%xmm3
11890 15,87,228, //xorps %xmm4,%xmm4
11891 15,87,237, //xorps %xmm5,%xmm5
11892 15,87,246, //xorps %xmm6,%xmm6
11893 15,87,255, //xorps %xmm7,%xmm7
11894 255,224, //jmpq *%rax
11895};
11896
11897CODE const uint8_t sk_constant_color_sse2[] = {
11898 72,173, //lods %ds:(%rsi),%rax
11899 15,16,24, //movups (%rax),%xmm3
11900 15,40,195, //movaps %xmm3,%xmm0
11901 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11902 15,40,203, //movaps %xmm3,%xmm1
11903 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
11904 15,40,211, //movaps %xmm3,%xmm2
11905 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
11906 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
11907 72,173, //lods %ds:(%rsi),%rax
11908 255,224, //jmpq *%rax
11909};
11910
11911CODE const uint8_t sk_clear_sse2[] = {
11912 72,173, //lods %ds:(%rsi),%rax
11913 15,87,192, //xorps %xmm0,%xmm0
11914 15,87,201, //xorps %xmm1,%xmm1
11915 15,87,210, //xorps %xmm2,%xmm2
11916 15,87,219, //xorps %xmm3,%xmm3
11917 255,224, //jmpq *%rax
11918};
11919
11920CODE const uint8_t sk_plus__sse2[] = {
11921 15,88,196, //addps %xmm4,%xmm0
11922 15,88,205, //addps %xmm5,%xmm1
11923 15,88,214, //addps %xmm6,%xmm2
11924 15,88,223, //addps %xmm7,%xmm3
11925 72,173, //lods %ds:(%rsi),%rax
11926 255,224, //jmpq *%rax
11927};
11928
11929CODE const uint8_t sk_srcover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011930 184,0,0,128,63, //mov $0x3f800000,%eax
11931 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011932 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11933 68,15,92,195, //subps %xmm3,%xmm8
11934 69,15,40,200, //movaps %xmm8,%xmm9
11935 68,15,89,204, //mulps %xmm4,%xmm9
11936 65,15,88,193, //addps %xmm9,%xmm0
11937 69,15,40,200, //movaps %xmm8,%xmm9
11938 68,15,89,205, //mulps %xmm5,%xmm9
11939 65,15,88,201, //addps %xmm9,%xmm1
11940 69,15,40,200, //movaps %xmm8,%xmm9
11941 68,15,89,206, //mulps %xmm6,%xmm9
11942 65,15,88,209, //addps %xmm9,%xmm2
11943 68,15,89,199, //mulps %xmm7,%xmm8
11944 65,15,88,216, //addps %xmm8,%xmm3
11945 72,173, //lods %ds:(%rsi),%rax
11946 255,224, //jmpq *%rax
11947};
11948
11949CODE const uint8_t sk_dstover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011950 184,0,0,128,63, //mov $0x3f800000,%eax
11951 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011952 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11953 68,15,92,199, //subps %xmm7,%xmm8
11954 65,15,89,192, //mulps %xmm8,%xmm0
11955 15,88,196, //addps %xmm4,%xmm0
11956 65,15,89,200, //mulps %xmm8,%xmm1
11957 15,88,205, //addps %xmm5,%xmm1
11958 65,15,89,208, //mulps %xmm8,%xmm2
11959 15,88,214, //addps %xmm6,%xmm2
11960 65,15,89,216, //mulps %xmm8,%xmm3
11961 15,88,223, //addps %xmm7,%xmm3
11962 72,173, //lods %ds:(%rsi),%rax
11963 255,224, //jmpq *%rax
11964};
11965
11966CODE const uint8_t sk_clamp_0_sse2[] = {
11967 69,15,87,192, //xorps %xmm8,%xmm8
11968 65,15,95,192, //maxps %xmm8,%xmm0
11969 65,15,95,200, //maxps %xmm8,%xmm1
11970 65,15,95,208, //maxps %xmm8,%xmm2
11971 65,15,95,216, //maxps %xmm8,%xmm3
11972 72,173, //lods %ds:(%rsi),%rax
11973 255,224, //jmpq *%rax
11974};
11975
11976CODE const uint8_t sk_clamp_1_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011977 184,0,0,128,63, //mov $0x3f800000,%eax
11978 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011979 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11980 65,15,93,192, //minps %xmm8,%xmm0
11981 65,15,93,200, //minps %xmm8,%xmm1
11982 65,15,93,208, //minps %xmm8,%xmm2
11983 65,15,93,216, //minps %xmm8,%xmm3
11984 72,173, //lods %ds:(%rsi),%rax
11985 255,224, //jmpq *%rax
11986};
11987
11988CODE const uint8_t sk_clamp_a_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011989 184,0,0,128,63, //mov $0x3f800000,%eax
11990 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011991 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11992 65,15,93,216, //minps %xmm8,%xmm3
11993 15,93,195, //minps %xmm3,%xmm0
11994 15,93,203, //minps %xmm3,%xmm1
11995 15,93,211, //minps %xmm3,%xmm2
11996 72,173, //lods %ds:(%rsi),%rax
11997 255,224, //jmpq *%rax
11998};
11999
12000CODE const uint8_t sk_set_rgb_sse2[] = {
12001 72,173, //lods %ds:(%rsi),%rax
12002 243,15,16,0, //movss (%rax),%xmm0
12003 243,15,16,72,4, //movss 0x4(%rax),%xmm1
12004 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12005 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12006 243,15,16,80,8, //movss 0x8(%rax),%xmm2
12007 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
12008 72,173, //lods %ds:(%rsi),%rax
12009 255,224, //jmpq *%rax
12010};
12011
12012CODE const uint8_t sk_swap_rb_sse2[] = {
12013 68,15,40,192, //movaps %xmm0,%xmm8
12014 72,173, //lods %ds:(%rsi),%rax
12015 15,40,194, //movaps %xmm2,%xmm0
12016 65,15,40,208, //movaps %xmm8,%xmm2
12017 255,224, //jmpq *%rax
12018};
12019
12020CODE const uint8_t sk_swap_sse2[] = {
12021 68,15,40,195, //movaps %xmm3,%xmm8
12022 68,15,40,202, //movaps %xmm2,%xmm9
12023 68,15,40,209, //movaps %xmm1,%xmm10
12024 68,15,40,216, //movaps %xmm0,%xmm11
12025 72,173, //lods %ds:(%rsi),%rax
12026 15,40,196, //movaps %xmm4,%xmm0
12027 15,40,205, //movaps %xmm5,%xmm1
12028 15,40,214, //movaps %xmm6,%xmm2
12029 15,40,223, //movaps %xmm7,%xmm3
12030 65,15,40,227, //movaps %xmm11,%xmm4
12031 65,15,40,234, //movaps %xmm10,%xmm5
12032 65,15,40,241, //movaps %xmm9,%xmm6
12033 65,15,40,248, //movaps %xmm8,%xmm7
12034 255,224, //jmpq *%rax
12035};
12036
12037CODE const uint8_t sk_move_src_dst_sse2[] = {
12038 72,173, //lods %ds:(%rsi),%rax
12039 15,40,224, //movaps %xmm0,%xmm4
12040 15,40,233, //movaps %xmm1,%xmm5
12041 15,40,242, //movaps %xmm2,%xmm6
12042 15,40,251, //movaps %xmm3,%xmm7
12043 255,224, //jmpq *%rax
12044};
12045
12046CODE const uint8_t sk_move_dst_src_sse2[] = {
12047 72,173, //lods %ds:(%rsi),%rax
12048 15,40,196, //movaps %xmm4,%xmm0
12049 15,40,205, //movaps %xmm5,%xmm1
12050 15,40,214, //movaps %xmm6,%xmm2
12051 15,40,223, //movaps %xmm7,%xmm3
12052 255,224, //jmpq *%rax
12053};
12054
12055CODE const uint8_t sk_premul_sse2[] = {
12056 15,89,195, //mulps %xmm3,%xmm0
12057 15,89,203, //mulps %xmm3,%xmm1
12058 15,89,211, //mulps %xmm3,%xmm2
12059 72,173, //lods %ds:(%rsi),%rax
12060 255,224, //jmpq *%rax
12061};
12062
12063CODE const uint8_t sk_unpremul_sse2[] = {
12064 69,15,87,192, //xorps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012065 184,0,0,128,63, //mov $0x3f800000,%eax
12066 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012067 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12068 68,15,94,203, //divps %xmm3,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -070012069 68,15,194,195,4, //cmpneqps %xmm3,%xmm8
12070 69,15,84,193, //andps %xmm9,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012071 65,15,89,192, //mulps %xmm8,%xmm0
12072 65,15,89,200, //mulps %xmm8,%xmm1
12073 65,15,89,208, //mulps %xmm8,%xmm2
12074 72,173, //lods %ds:(%rsi),%rax
12075 255,224, //jmpq *%rax
12076};
12077
12078CODE const uint8_t sk_from_srgb_sse2[] = {
Mike Klein5224f462017-03-07 17:29:54 -050012079 184,145,131,158,61, //mov $0x3d9e8391,%eax
12080 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012081 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12082 69,15,40,232, //movaps %xmm8,%xmm13
12083 68,15,89,232, //mulps %xmm0,%xmm13
12084 68,15,40,224, //movaps %xmm0,%xmm12
12085 69,15,89,228, //mulps %xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -050012086 184,154,153,153,62, //mov $0x3e99999a,%eax
12087 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012088 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050012089 184,92,143,50,63, //mov $0x3f328f5c,%eax
12090 102,68,15,110,208, //movd %eax,%xmm10
12091 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050012092 69,15,40,241, //movaps %xmm9,%xmm14
12093 68,15,89,240, //mulps %xmm0,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -050012094 69,15,88,242, //addps %xmm10,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -050012095 184,10,215,35,59, //mov $0x3b23d70a,%eax
12096 102,68,15,110,216, //movd %eax,%xmm11
12097 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12098 69,15,89,244, //mulps %xmm12,%xmm14
12099 69,15,88,243, //addps %xmm11,%xmm14
12100 184,174,71,97,61, //mov $0x3d6147ae,%eax
12101 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050012102 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12103 65,15,194,196,1, //cmpltps %xmm12,%xmm0
12104 68,15,84,232, //andps %xmm0,%xmm13
12105 65,15,85,198, //andnps %xmm14,%xmm0
12106 65,15,86,197, //orps %xmm13,%xmm0
12107 69,15,40,232, //movaps %xmm8,%xmm13
12108 68,15,89,233, //mulps %xmm1,%xmm13
12109 68,15,40,241, //movaps %xmm1,%xmm14
12110 69,15,89,246, //mulps %xmm14,%xmm14
12111 69,15,40,249, //movaps %xmm9,%xmm15
12112 68,15,89,249, //mulps %xmm1,%xmm15
Mike Klein894d5612017-03-07 07:59:52 -050012113 69,15,88,250, //addps %xmm10,%xmm15
Mike Klein5224f462017-03-07 17:29:54 -050012114 69,15,89,254, //mulps %xmm14,%xmm15
12115 69,15,88,251, //addps %xmm11,%xmm15
Mike Klein894d5612017-03-07 07:59:52 -050012116 65,15,194,204,1, //cmpltps %xmm12,%xmm1
12117 68,15,84,233, //andps %xmm1,%xmm13
12118 65,15,85,207, //andnps %xmm15,%xmm1
12119 65,15,86,205, //orps %xmm13,%xmm1
12120 68,15,89,194, //mulps %xmm2,%xmm8
12121 68,15,40,234, //movaps %xmm2,%xmm13
12122 69,15,89,237, //mulps %xmm13,%xmm13
12123 68,15,89,202, //mulps %xmm2,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012124 69,15,88,202, //addps %xmm10,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050012125 69,15,89,205, //mulps %xmm13,%xmm9
12126 69,15,88,203, //addps %xmm11,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012127 65,15,194,212,1, //cmpltps %xmm12,%xmm2
12128 68,15,84,194, //andps %xmm2,%xmm8
12129 65,15,85,209, //andnps %xmm9,%xmm2
12130 65,15,86,208, //orps %xmm8,%xmm2
12131 72,173, //lods %ds:(%rsi),%rax
12132 255,224, //jmpq *%rax
12133};
12134
12135CODE const uint8_t sk_to_srgb_sse2[] = {
Mike Klein894d5612017-03-07 07:59:52 -050012136 68,15,82,192, //rsqrtps %xmm0,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050012137 69,15,83,248, //rcpps %xmm8,%xmm15
12138 69,15,82,232, //rsqrtps %xmm8,%xmm13
12139 184,41,92,71,65, //mov $0x41475c29,%eax
12140 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012141 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12142 69,15,40,240, //movaps %xmm8,%xmm14
12143 68,15,89,240, //mulps %xmm0,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -050012144 184,0,0,128,63, //mov $0x3f800000,%eax
12145 102,68,15,110,200, //movd %eax,%xmm9
12146 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12147 184,194,135,210,62, //mov $0x3ed287c2,%eax
12148 102,68,15,110,208, //movd %eax,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050012149 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
Mike Klein5224f462017-03-07 17:29:54 -050012150 184,206,111,48,63, //mov $0x3f306fce,%eax
12151 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050012152 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
Mike Klein5224f462017-03-07 17:29:54 -050012153 184,168,87,202,61, //mov $0x3dca57a8,%eax
12154 53,0,0,0,128, //xor $0x80000000,%eax
12155 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050012156 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
Mike Klein5224f462017-03-07 17:29:54 -050012157 69,15,89,251, //mulps %xmm11,%xmm15
12158 69,15,88,252, //addps %xmm12,%xmm15
12159 69,15,89,234, //mulps %xmm10,%xmm13
12160 69,15,88,239, //addps %xmm15,%xmm13
12161 69,15,40,249, //movaps %xmm9,%xmm15
12162 69,15,93,253, //minps %xmm13,%xmm15
12163 184,4,231,140,59, //mov $0x3b8ce704,%eax
12164 102,68,15,110,232, //movd %eax,%xmm13
Mike Klein894d5612017-03-07 07:59:52 -050012165 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12166 65,15,194,197,1, //cmpltps %xmm13,%xmm0
12167 68,15,84,240, //andps %xmm0,%xmm14
Mike Klein5224f462017-03-07 17:29:54 -050012168 65,15,85,199, //andnps %xmm15,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012169 65,15,86,198, //orps %xmm14,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050012170 68,15,82,241, //rsqrtps %xmm1,%xmm14
12171 69,15,83,254, //rcpps %xmm14,%xmm15
12172 69,15,82,246, //rsqrtps %xmm14,%xmm14
12173 69,15,89,251, //mulps %xmm11,%xmm15
12174 69,15,88,252, //addps %xmm12,%xmm15
12175 69,15,89,242, //mulps %xmm10,%xmm14
12176 69,15,88,247, //addps %xmm15,%xmm14
12177 69,15,40,249, //movaps %xmm9,%xmm15
12178 69,15,93,254, //minps %xmm14,%xmm15
12179 69,15,40,240, //movaps %xmm8,%xmm14
12180 68,15,89,241, //mulps %xmm1,%xmm14
Mike Klein894d5612017-03-07 07:59:52 -050012181 65,15,194,205,1, //cmpltps %xmm13,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050012182 68,15,84,241, //andps %xmm1,%xmm14
12183 65,15,85,207, //andnps %xmm15,%xmm1
12184 65,15,86,206, //orps %xmm14,%xmm1
12185 68,15,82,242, //rsqrtps %xmm2,%xmm14
12186 69,15,83,254, //rcpps %xmm14,%xmm15
12187 69,15,89,251, //mulps %xmm11,%xmm15
12188 69,15,88,252, //addps %xmm12,%xmm15
12189 69,15,82,222, //rsqrtps %xmm14,%xmm11
12190 69,15,89,218, //mulps %xmm10,%xmm11
12191 69,15,88,223, //addps %xmm15,%xmm11
12192 69,15,93,203, //minps %xmm11,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012193 68,15,89,194, //mulps %xmm2,%xmm8
12194 65,15,194,213,1, //cmpltps %xmm13,%xmm2
12195 68,15,84,194, //andps %xmm2,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050012196 65,15,85,209, //andnps %xmm9,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050012197 65,15,86,208, //orps %xmm8,%xmm2
12198 72,173, //lods %ds:(%rsi),%rax
Mike Klein894d5612017-03-07 07:59:52 -050012199 255,224, //jmpq *%rax
12200};
12201
12202CODE const uint8_t sk_scale_1_float_sse2[] = {
12203 72,173, //lods %ds:(%rsi),%rax
12204 243,68,15,16,0, //movss (%rax),%xmm8
12205 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12206 65,15,89,192, //mulps %xmm8,%xmm0
12207 65,15,89,200, //mulps %xmm8,%xmm1
12208 65,15,89,208, //mulps %xmm8,%xmm2
12209 65,15,89,216, //mulps %xmm8,%xmm3
12210 72,173, //lods %ds:(%rsi),%rax
12211 255,224, //jmpq *%rax
12212};
12213
12214CODE const uint8_t sk_scale_u8_sse2[] = {
12215 72,173, //lods %ds:(%rsi),%rax
12216 72,139,0, //mov (%rax),%rax
12217 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
12218 102,69,15,239,201, //pxor %xmm9,%xmm9
12219 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
12220 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
12221 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012222 184,129,128,128,59, //mov $0x3b808081,%eax
12223 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012224 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12225 69,15,89,200, //mulps %xmm8,%xmm9
12226 65,15,89,193, //mulps %xmm9,%xmm0
12227 65,15,89,201, //mulps %xmm9,%xmm1
12228 65,15,89,209, //mulps %xmm9,%xmm2
12229 65,15,89,217, //mulps %xmm9,%xmm3
12230 72,173, //lods %ds:(%rsi),%rax
12231 255,224, //jmpq *%rax
12232};
12233
12234CODE const uint8_t sk_lerp_1_float_sse2[] = {
12235 72,173, //lods %ds:(%rsi),%rax
12236 243,68,15,16,0, //movss (%rax),%xmm8
12237 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12238 15,92,196, //subps %xmm4,%xmm0
12239 65,15,89,192, //mulps %xmm8,%xmm0
12240 15,88,196, //addps %xmm4,%xmm0
12241 15,92,205, //subps %xmm5,%xmm1
12242 65,15,89,200, //mulps %xmm8,%xmm1
12243 15,88,205, //addps %xmm5,%xmm1
12244 15,92,214, //subps %xmm6,%xmm2
12245 65,15,89,208, //mulps %xmm8,%xmm2
12246 15,88,214, //addps %xmm6,%xmm2
12247 15,92,223, //subps %xmm7,%xmm3
12248 65,15,89,216, //mulps %xmm8,%xmm3
12249 15,88,223, //addps %xmm7,%xmm3
12250 72,173, //lods %ds:(%rsi),%rax
12251 255,224, //jmpq *%rax
12252};
12253
12254CODE const uint8_t sk_lerp_u8_sse2[] = {
12255 72,173, //lods %ds:(%rsi),%rax
12256 72,139,0, //mov (%rax),%rax
12257 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
12258 102,69,15,239,201, //pxor %xmm9,%xmm9
12259 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
12260 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
12261 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012262 184,129,128,128,59, //mov $0x3b808081,%eax
12263 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012264 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12265 69,15,89,200, //mulps %xmm8,%xmm9
12266 15,92,196, //subps %xmm4,%xmm0
12267 65,15,89,193, //mulps %xmm9,%xmm0
12268 15,88,196, //addps %xmm4,%xmm0
12269 15,92,205, //subps %xmm5,%xmm1
12270 65,15,89,201, //mulps %xmm9,%xmm1
12271 15,88,205, //addps %xmm5,%xmm1
12272 15,92,214, //subps %xmm6,%xmm2
12273 65,15,89,209, //mulps %xmm9,%xmm2
12274 15,88,214, //addps %xmm6,%xmm2
12275 15,92,223, //subps %xmm7,%xmm3
12276 65,15,89,217, //mulps %xmm9,%xmm3
12277 15,88,223, //addps %xmm7,%xmm3
12278 72,173, //lods %ds:(%rsi),%rax
12279 255,224, //jmpq *%rax
12280};
12281
12282CODE const uint8_t sk_lerp_565_sse2[] = {
12283 72,173, //lods %ds:(%rsi),%rax
12284 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050012285 243,68,15,126,4,120, //movq (%rax,%rdi,2),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012286 102,15,239,219, //pxor %xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050012287 102,68,15,97,195, //punpcklwd %xmm3,%xmm8
12288 184,0,248,0,0, //mov $0xf800,%eax
12289 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050012290 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050012291 102,65,15,219,216, //pand %xmm8,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012292 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050012293 184,8,33,132,55, //mov $0x37842108,%eax
12294 102,68,15,110,208, //movd %eax,%xmm10
12295 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12296 69,15,89,209, //mulps %xmm9,%xmm10
12297 184,224,7,0,0, //mov $0x7e0,%eax
12298 102,15,110,216, //movd %eax,%xmm3
12299 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
12300 102,65,15,219,216, //pand %xmm8,%xmm3
12301 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
12302 184,33,8,2,58, //mov $0x3a020821,%eax
12303 102,68,15,110,216, //movd %eax,%xmm11
12304 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12305 69,15,89,217, //mulps %xmm9,%xmm11
12306 184,31,0,0,0, //mov $0x1f,%eax
12307 102,15,110,216, //movd %eax,%xmm3
12308 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
12309 102,65,15,219,216, //pand %xmm8,%xmm3
12310 68,15,91,195, //cvtdq2ps %xmm3,%xmm8
12311 184,8,33,4,61, //mov $0x3d042108,%eax
12312 102,15,110,216, //movd %eax,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012313 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050012314 65,15,89,216, //mulps %xmm8,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050012315 15,92,196, //subps %xmm4,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050012316 65,15,89,194, //mulps %xmm10,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012317 15,88,196, //addps %xmm4,%xmm0
12318 15,92,205, //subps %xmm5,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050012319 65,15,89,203, //mulps %xmm11,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050012320 15,88,205, //addps %xmm5,%xmm1
12321 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012322 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050012323 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012324 184,0,0,128,63, //mov $0x3f800000,%eax
12325 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050012326 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12327 72,173, //lods %ds:(%rsi),%rax
12328 255,224, //jmpq *%rax
12329};
12330
12331CODE const uint8_t sk_load_tables_sse2[] = {
12332 72,173, //lods %ds:(%rsi),%rax
12333 72,139,8, //mov (%rax),%rcx
12334 76,139,64,8, //mov 0x8(%rax),%r8
12335 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050012336 185,255,0,0,0, //mov $0xff,%ecx
12337 102,15,110,193, //movd %ecx,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012338 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
12339 102,69,15,111,200, //movdqa %xmm8,%xmm9
12340 102,65,15,114,209,8, //psrld $0x8,%xmm9
12341 102,68,15,219,200, //pand %xmm0,%xmm9
12342 102,69,15,111,208, //movdqa %xmm8,%xmm10
12343 102,65,15,114,210,16, //psrld $0x10,%xmm10
12344 102,68,15,219,208, //pand %xmm0,%xmm10
12345 102,65,15,219,192, //pand %xmm8,%xmm0
12346 102,15,112,216,78, //pshufd $0x4e,%xmm0,%xmm3
12347 102,72,15,126,217, //movq %xmm3,%rcx
12348 65,137,201, //mov %ecx,%r9d
12349 72,193,233,32, //shr $0x20,%rcx
12350 102,73,15,126,194, //movq %xmm0,%r10
12351 69,137,211, //mov %r10d,%r11d
12352 73,193,234,32, //shr $0x20,%r10
12353 243,67,15,16,28,144, //movss (%r8,%r10,4),%xmm3
12354 243,65,15,16,4,136, //movss (%r8,%rcx,4),%xmm0
12355 15,20,216, //unpcklps %xmm0,%xmm3
12356 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
12357 243,67,15,16,12,136, //movss (%r8,%r9,4),%xmm1
12358 15,20,193, //unpcklps %xmm1,%xmm0
12359 15,20,195, //unpcklps %xmm3,%xmm0
Mike Klein64b97482017-03-14 17:35:04 -070012360 76,139,64,16, //mov 0x10(%rax),%r8
Mike Klein894d5612017-03-07 07:59:52 -050012361 102,65,15,112,201,78, //pshufd $0x4e,%xmm9,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -070012362 102,73,15,126,202, //movq %xmm1,%r10
12363 77,137,209, //mov %r10,%r9
12364 73,193,233,32, //shr $0x20,%r9
12365 102,76,15,126,201, //movq %xmm9,%rcx
12366 65,137,203, //mov %ecx,%r11d
12367 65,129,227,255,255,255,0, //and $0xffffff,%r11d
12368 72,193,233,30, //shr $0x1e,%rcx
12369 65,129,226,255,255,255,0, //and $0xffffff,%r10d
12370 243,65,15,16,28,8, //movss (%r8,%rcx,1),%xmm3
12371 243,67,15,16,12,136, //movss (%r8,%r9,4),%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050012372 15,20,217, //unpcklps %xmm1,%xmm3
Mike Klein64b97482017-03-14 17:35:04 -070012373 243,67,15,16,12,152, //movss (%r8,%r11,4),%xmm1
12374 243,67,15,16,20,144, //movss (%r8,%r10,4),%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050012375 15,20,202, //unpcklps %xmm2,%xmm1
12376 15,20,203, //unpcklps %xmm3,%xmm1
Mike Klein64b97482017-03-14 17:35:04 -070012377 76,139,72,24, //mov 0x18(%rax),%r9
Mike Klein894d5612017-03-07 07:59:52 -050012378 102,65,15,112,210,78, //pshufd $0x4e,%xmm10,%xmm2
12379 102,72,15,126,209, //movq %xmm2,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070012380 68,15,183,193, //movzwl %cx,%r8d
Mike Klein894d5612017-03-07 07:59:52 -050012381 72,193,233,32, //shr $0x20,%rcx
Mike Klein64b97482017-03-14 17:35:04 -070012382 102,76,15,126,208, //movq %xmm10,%rax
12383 68,15,183,208, //movzwl %ax,%r10d
12384 72,193,232,30, //shr $0x1e,%rax
12385 243,69,15,16,12,1, //movss (%r9,%rax,1),%xmm9
12386 243,65,15,16,20,137, //movss (%r9,%rcx,4),%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050012387 68,15,20,202, //unpcklps %xmm2,%xmm9
Mike Klein64b97482017-03-14 17:35:04 -070012388 243,67,15,16,20,145, //movss (%r9,%r10,4),%xmm2
12389 243,67,15,16,28,129, //movss (%r9,%r8,4),%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050012390 15,20,211, //unpcklps %xmm3,%xmm2
12391 65,15,20,209, //unpcklps %xmm9,%xmm2
12392 102,65,15,114,208,24, //psrld $0x18,%xmm8
12393 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050012394 184,129,128,128,59, //mov $0x3b808081,%eax
12395 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050012396 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12397 65,15,89,216, //mulps %xmm8,%xmm3
12398 72,173, //lods %ds:(%rsi),%rax
12399 255,224, //jmpq *%rax
12400};
12401
12402CODE const uint8_t sk_load_a8_sse2[] = {
12403 72,173, //lods %ds:(%rsi),%rax
12404 72,139,0, //mov (%rax),%rax
12405 102,15,110,4,56, //movd (%rax,%rdi,1),%xmm0
12406 102,15,239,201, //pxor %xmm1,%xmm1
12407 102,15,96,193, //punpcklbw %xmm1,%xmm0
12408 102,15,97,193, //punpcklwd %xmm1,%xmm0
12409 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050012410 184,129,128,128,59, //mov $0x3b808081,%eax
12411 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050012412 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12413 15,89,216, //mulps %xmm0,%xmm3
12414 72,173, //lods %ds:(%rsi),%rax
12415 15,87,192, //xorps %xmm0,%xmm0
12416 102,15,239,201, //pxor %xmm1,%xmm1
12417 15,87,210, //xorps %xmm2,%xmm2
12418 255,224, //jmpq *%rax
12419};
12420
12421CODE const uint8_t sk_store_a8_sse2[] = {
12422 72,173, //lods %ds:(%rsi),%rax
12423 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050012424 185,0,0,127,67, //mov $0x437f0000,%ecx
12425 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012426 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12427 68,15,89,195, //mulps %xmm3,%xmm8
12428 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
12429 102,65,15,114,240,16, //pslld $0x10,%xmm8
12430 102,65,15,114,224,16, //psrad $0x10,%xmm8
12431 102,69,15,107,192, //packssdw %xmm8,%xmm8
12432 102,69,15,103,192, //packuswb %xmm8,%xmm8
12433 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
12434 72,173, //lods %ds:(%rsi),%rax
12435 255,224, //jmpq *%rax
12436};
12437
12438CODE const uint8_t sk_load_565_sse2[] = {
12439 72,173, //lods %ds:(%rsi),%rax
12440 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050012441 243,15,126,20,120, //movq (%rax,%rdi,2),%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050012442 102,15,239,192, //pxor %xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050012443 102,15,97,208, //punpcklwd %xmm0,%xmm2
12444 184,0,248,0,0, //mov $0xf800,%eax
12445 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012446 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050012447 102,15,219,194, //pand %xmm2,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012448 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050012449 184,8,33,132,55, //mov $0x37842108,%eax
12450 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012451 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12452 15,89,193, //mulps %xmm1,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050012453 184,224,7,0,0, //mov $0x7e0,%eax
12454 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050012455 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050012456 102,15,219,202, //pand %xmm2,%xmm1
12457 15,91,217, //cvtdq2ps %xmm1,%xmm3
12458 184,33,8,2,58, //mov $0x3a020821,%eax
12459 102,15,110,200, //movd %eax,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050012460 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
Mike Klein5224f462017-03-07 17:29:54 -050012461 15,89,203, //mulps %xmm3,%xmm1
12462 184,31,0,0,0, //mov $0x1f,%eax
12463 102,15,110,216, //movd %eax,%xmm3
12464 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
12465 102,15,219,218, //pand %xmm2,%xmm3
12466 15,91,219, //cvtdq2ps %xmm3,%xmm3
12467 184,8,33,4,61, //mov $0x3d042108,%eax
12468 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050012469 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -050012470 15,89,211, //mulps %xmm3,%xmm2
12471 184,0,0,128,63, //mov $0x3f800000,%eax
12472 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050012473 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12474 72,173, //lods %ds:(%rsi),%rax
12475 255,224, //jmpq *%rax
12476};
12477
12478CODE const uint8_t sk_store_565_sse2[] = {
12479 72,173, //lods %ds:(%rsi),%rax
12480 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050012481 185,0,0,248,65, //mov $0x41f80000,%ecx
12482 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012483 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050012484 69,15,40,200, //movaps %xmm8,%xmm9
12485 68,15,89,200, //mulps %xmm0,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050012486 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
Mike Klein5224f462017-03-07 17:29:54 -050012487 102,65,15,114,241,11, //pslld $0xb,%xmm9
12488 185,0,0,124,66, //mov $0x427c0000,%ecx
12489 102,68,15,110,209, //movd %ecx,%xmm10
12490 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12491 68,15,89,209, //mulps %xmm1,%xmm10
12492 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
12493 102,65,15,114,242,5, //pslld $0x5,%xmm10
12494 102,69,15,235,209, //por %xmm9,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050012495 68,15,89,194, //mulps %xmm2,%xmm8
12496 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
Mike Klein5224f462017-03-07 17:29:54 -050012497 102,69,15,86,194, //orpd %xmm10,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012498 102,65,15,114,240,16, //pslld $0x10,%xmm8
12499 102,65,15,114,224,16, //psrad $0x10,%xmm8
12500 102,69,15,107,192, //packssdw %xmm8,%xmm8
12501 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
12502 72,173, //lods %ds:(%rsi),%rax
12503 255,224, //jmpq *%rax
12504};
12505
12506CODE const uint8_t sk_load_8888_sse2[] = {
12507 72,173, //lods %ds:(%rsi),%rax
12508 72,139,0, //mov (%rax),%rax
12509 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012510 184,255,0,0,0, //mov $0xff,%eax
12511 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012512 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
12513 102,15,111,203, //movdqa %xmm3,%xmm1
12514 102,15,114,209,8, //psrld $0x8,%xmm1
12515 102,15,219,200, //pand %xmm0,%xmm1
12516 102,15,111,211, //movdqa %xmm3,%xmm2
12517 102,15,114,210,16, //psrld $0x10,%xmm2
12518 102,15,219,208, //pand %xmm0,%xmm2
12519 102,15,219,195, //pand %xmm3,%xmm0
12520 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012521 184,129,128,128,59, //mov $0x3b808081,%eax
12522 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012523 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12524 65,15,89,192, //mulps %xmm8,%xmm0
12525 15,91,201, //cvtdq2ps %xmm1,%xmm1
12526 65,15,89,200, //mulps %xmm8,%xmm1
12527 15,91,210, //cvtdq2ps %xmm2,%xmm2
12528 65,15,89,208, //mulps %xmm8,%xmm2
12529 102,15,114,211,24, //psrld $0x18,%xmm3
12530 15,91,219, //cvtdq2ps %xmm3,%xmm3
12531 65,15,89,216, //mulps %xmm8,%xmm3
12532 72,173, //lods %ds:(%rsi),%rax
12533 255,224, //jmpq *%rax
12534};
12535
12536CODE const uint8_t sk_store_8888_sse2[] = {
12537 72,173, //lods %ds:(%rsi),%rax
12538 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012539 185,0,0,127,67, //mov $0x437f0000,%ecx
12540 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012541 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12542 69,15,40,200, //movaps %xmm8,%xmm9
12543 68,15,89,200, //mulps %xmm0,%xmm9
12544 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
12545 69,15,40,208, //movaps %xmm8,%xmm10
12546 68,15,89,209, //mulps %xmm1,%xmm10
12547 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
12548 102,65,15,114,242,8, //pslld $0x8,%xmm10
12549 102,69,15,235,209, //por %xmm9,%xmm10
12550 69,15,40,200, //movaps %xmm8,%xmm9
12551 68,15,89,202, //mulps %xmm2,%xmm9
12552 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
12553 102,65,15,114,241,16, //pslld $0x10,%xmm9
12554 68,15,89,195, //mulps %xmm3,%xmm8
12555 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
12556 102,65,15,114,240,24, //pslld $0x18,%xmm8
12557 102,69,15,235,193, //por %xmm9,%xmm8
12558 102,69,15,235,194, //por %xmm10,%xmm8
12559 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
12560 72,173, //lods %ds:(%rsi),%rax
12561 255,224, //jmpq *%rax
12562};
12563
12564CODE const uint8_t sk_load_f16_sse2[] = {
12565 72,173, //lods %ds:(%rsi),%rax
12566 72,139,0, //mov (%rax),%rax
12567 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
12568 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
12569 102,15,111,208, //movdqa %xmm0,%xmm2
12570 102,15,97,209, //punpcklwd %xmm1,%xmm2
12571 102,15,105,193, //punpckhwd %xmm1,%xmm0
12572 102,68,15,111,194, //movdqa %xmm2,%xmm8
12573 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
12574 102,15,105,208, //punpckhwd %xmm0,%xmm2
Mike Klein5224f462017-03-07 17:29:54 -050012575 184,0,4,0,4, //mov $0x4000400,%eax
12576 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012577 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
12578 102,15,111,203, //movdqa %xmm3,%xmm1
12579 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
12580 102,65,15,223,200, //pandn %xmm8,%xmm1
12581 102,15,101,218, //pcmpgtw %xmm2,%xmm3
12582 102,15,223,218, //pandn %xmm2,%xmm3
12583 102,69,15,239,192, //pxor %xmm8,%xmm8
12584 102,15,111,193, //movdqa %xmm1,%xmm0
12585 102,65,15,97,192, //punpcklwd %xmm8,%xmm0
12586 102,15,114,240,13, //pslld $0xd,%xmm0
Mike Klein5224f462017-03-07 17:29:54 -050012587 184,0,0,128,119, //mov $0x77800000,%eax
12588 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050012589 102,68,15,112,202,0, //pshufd $0x0,%xmm2,%xmm9
12590 65,15,89,193, //mulps %xmm9,%xmm0
12591 102,65,15,105,200, //punpckhwd %xmm8,%xmm1
12592 102,15,114,241,13, //pslld $0xd,%xmm1
12593 65,15,89,201, //mulps %xmm9,%xmm1
12594 102,15,111,211, //movdqa %xmm3,%xmm2
12595 102,65,15,97,208, //punpcklwd %xmm8,%xmm2
12596 102,15,114,242,13, //pslld $0xd,%xmm2
12597 65,15,89,209, //mulps %xmm9,%xmm2
12598 102,65,15,105,216, //punpckhwd %xmm8,%xmm3
12599 102,15,114,243,13, //pslld $0xd,%xmm3
12600 65,15,89,217, //mulps %xmm9,%xmm3
12601 72,173, //lods %ds:(%rsi),%rax
12602 255,224, //jmpq *%rax
12603};
12604
12605CODE const uint8_t sk_store_f16_sse2[] = {
12606 72,173, //lods %ds:(%rsi),%rax
12607 72,139,0, //mov (%rax),%rax
Mike Klein5224f462017-03-07 17:29:54 -050012608 185,0,0,128,7, //mov $0x7800000,%ecx
12609 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012610 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
12611 102,69,15,111,200, //movdqa %xmm8,%xmm9
12612 68,15,89,200, //mulps %xmm0,%xmm9
12613 102,65,15,114,209,13, //psrld $0xd,%xmm9
12614 102,69,15,111,208, //movdqa %xmm8,%xmm10
12615 68,15,89,209, //mulps %xmm1,%xmm10
12616 102,65,15,114,210,13, //psrld $0xd,%xmm10
12617 102,69,15,111,216, //movdqa %xmm8,%xmm11
12618 68,15,89,218, //mulps %xmm2,%xmm11
12619 102,65,15,114,211,13, //psrld $0xd,%xmm11
12620 68,15,89,195, //mulps %xmm3,%xmm8
12621 102,65,15,114,208,13, //psrld $0xd,%xmm8
12622 102,65,15,115,250,2, //pslldq $0x2,%xmm10
12623 102,69,15,235,209, //por %xmm9,%xmm10
12624 102,65,15,115,248,2, //pslldq $0x2,%xmm8
12625 102,69,15,235,195, //por %xmm11,%xmm8
12626 102,69,15,111,202, //movdqa %xmm10,%xmm9
12627 102,69,15,98,200, //punpckldq %xmm8,%xmm9
12628 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
12629 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
12630 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
12631 72,173, //lods %ds:(%rsi),%rax
12632 255,224, //jmpq *%rax
12633};
12634
12635CODE const uint8_t sk_store_f32_sse2[] = {
12636 72,173, //lods %ds:(%rsi),%rax
12637 72,139,0, //mov (%rax),%rax
12638 72,137,249, //mov %rdi,%rcx
12639 72,193,225,4, //shl $0x4,%rcx
12640 68,15,40,192, //movaps %xmm0,%xmm8
12641 68,15,40,200, //movaps %xmm0,%xmm9
12642 68,15,20,201, //unpcklps %xmm1,%xmm9
12643 68,15,40,210, //movaps %xmm2,%xmm10
12644 68,15,40,218, //movaps %xmm2,%xmm11
12645 68,15,20,219, //unpcklps %xmm3,%xmm11
12646 68,15,21,193, //unpckhps %xmm1,%xmm8
12647 68,15,21,211, //unpckhps %xmm3,%xmm10
12648 69,15,40,225, //movaps %xmm9,%xmm12
12649 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
Mike Klein64b97482017-03-14 17:35:04 -070012650 69,15,18,217, //movhlps %xmm9,%xmm11
12651 69,15,40,200, //movaps %xmm8,%xmm9
12652 102,69,15,20,202, //unpcklpd %xmm10,%xmm9
12653 69,15,18,208, //movhlps %xmm8,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050012654 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
Mike Klein64b97482017-03-14 17:35:04 -070012655 68,15,17,92,8,16, //movups %xmm11,0x10(%rax,%rcx,1)
12656 102,68,15,17,76,8,32, //movupd %xmm9,0x20(%rax,%rcx,1)
12657 68,15,17,84,8,48, //movups %xmm10,0x30(%rax,%rcx,1)
Mike Klein894d5612017-03-07 07:59:52 -050012658 72,173, //lods %ds:(%rsi),%rax
12659 255,224, //jmpq *%rax
12660};
12661
12662CODE const uint8_t sk_clamp_x_sse2[] = {
12663 72,173, //lods %ds:(%rsi),%rax
12664 69,15,87,192, //xorps %xmm8,%xmm8
12665 68,15,95,192, //maxps %xmm0,%xmm8
12666 243,68,15,16,8, //movss (%rax),%xmm9
12667 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12668 102,15,118,192, //pcmpeqd %xmm0,%xmm0
12669 102,65,15,254,193, //paddd %xmm9,%xmm0
12670 68,15,93,192, //minps %xmm0,%xmm8
12671 72,173, //lods %ds:(%rsi),%rax
12672 65,15,40,192, //movaps %xmm8,%xmm0
12673 255,224, //jmpq *%rax
12674};
12675
12676CODE const uint8_t sk_clamp_y_sse2[] = {
12677 72,173, //lods %ds:(%rsi),%rax
12678 69,15,87,192, //xorps %xmm8,%xmm8
12679 68,15,95,193, //maxps %xmm1,%xmm8
12680 243,68,15,16,8, //movss (%rax),%xmm9
12681 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12682 102,15,118,201, //pcmpeqd %xmm1,%xmm1
12683 102,65,15,254,201, //paddd %xmm9,%xmm1
12684 68,15,93,193, //minps %xmm1,%xmm8
12685 72,173, //lods %ds:(%rsi),%rax
12686 65,15,40,200, //movaps %xmm8,%xmm1
12687 255,224, //jmpq *%rax
12688};
12689
12690CODE const uint8_t sk_repeat_x_sse2[] = {
12691 72,173, //lods %ds:(%rsi),%rax
12692 243,68,15,16,0, //movss (%rax),%xmm8
12693 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12694 68,15,40,200, //movaps %xmm0,%xmm9
12695 69,15,94,200, //divps %xmm8,%xmm9
12696 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
12697 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
12698 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012699 184,0,0,128,63, //mov $0x3f800000,%eax
12700 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050012701 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12702 69,15,84,217, //andps %xmm9,%xmm11
12703 69,15,92,211, //subps %xmm11,%xmm10
12704 69,15,89,208, //mulps %xmm8,%xmm10
12705 65,15,92,194, //subps %xmm10,%xmm0
12706 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12707 102,69,15,254,200, //paddd %xmm8,%xmm9
12708 65,15,93,193, //minps %xmm9,%xmm0
12709 72,173, //lods %ds:(%rsi),%rax
12710 255,224, //jmpq *%rax
12711};
12712
12713CODE const uint8_t sk_repeat_y_sse2[] = {
12714 72,173, //lods %ds:(%rsi),%rax
12715 243,68,15,16,0, //movss (%rax),%xmm8
12716 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12717 68,15,40,201, //movaps %xmm1,%xmm9
12718 69,15,94,200, //divps %xmm8,%xmm9
12719 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
12720 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
12721 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012722 184,0,0,128,63, //mov $0x3f800000,%eax
12723 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050012724 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12725 69,15,84,217, //andps %xmm9,%xmm11
12726 69,15,92,211, //subps %xmm11,%xmm10
12727 69,15,89,208, //mulps %xmm8,%xmm10
12728 65,15,92,202, //subps %xmm10,%xmm1
12729 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12730 102,69,15,254,200, //paddd %xmm8,%xmm9
12731 65,15,93,201, //minps %xmm9,%xmm1
12732 72,173, //lods %ds:(%rsi),%rax
12733 255,224, //jmpq *%rax
12734};
12735
12736CODE const uint8_t sk_mirror_x_sse2[] = {
12737 72,173, //lods %ds:(%rsi),%rax
12738 243,68,15,16,8, //movss (%rax),%xmm9
12739 69,15,40,193, //movaps %xmm9,%xmm8
12740 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12741 65,15,92,192, //subps %xmm8,%xmm0
12742 243,69,15,88,201, //addss %xmm9,%xmm9
12743 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12744 68,15,40,208, //movaps %xmm0,%xmm10
12745 69,15,94,209, //divps %xmm9,%xmm10
12746 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
12747 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
12748 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012749 184,0,0,128,63, //mov $0x3f800000,%eax
12750 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050012751 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12752 69,15,84,226, //andps %xmm10,%xmm12
12753 69,15,87,210, //xorps %xmm10,%xmm10
12754 69,15,92,220, //subps %xmm12,%xmm11
12755 69,15,89,217, //mulps %xmm9,%xmm11
12756 65,15,92,195, //subps %xmm11,%xmm0
12757 65,15,92,192, //subps %xmm8,%xmm0
12758 68,15,92,208, //subps %xmm0,%xmm10
12759 65,15,84,194, //andps %xmm10,%xmm0
12760 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12761 102,69,15,254,200, //paddd %xmm8,%xmm9
12762 65,15,93,193, //minps %xmm9,%xmm0
12763 72,173, //lods %ds:(%rsi),%rax
12764 255,224, //jmpq *%rax
12765};
12766
12767CODE const uint8_t sk_mirror_y_sse2[] = {
12768 72,173, //lods %ds:(%rsi),%rax
12769 243,68,15,16,8, //movss (%rax),%xmm9
12770 69,15,40,193, //movaps %xmm9,%xmm8
12771 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12772 65,15,92,200, //subps %xmm8,%xmm1
12773 243,69,15,88,201, //addss %xmm9,%xmm9
12774 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12775 68,15,40,209, //movaps %xmm1,%xmm10
12776 69,15,94,209, //divps %xmm9,%xmm10
12777 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
12778 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
12779 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012780 184,0,0,128,63, //mov $0x3f800000,%eax
12781 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050012782 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12783 69,15,84,226, //andps %xmm10,%xmm12
12784 69,15,87,210, //xorps %xmm10,%xmm10
12785 69,15,92,220, //subps %xmm12,%xmm11
12786 69,15,89,217, //mulps %xmm9,%xmm11
12787 65,15,92,203, //subps %xmm11,%xmm1
12788 65,15,92,200, //subps %xmm8,%xmm1
12789 68,15,92,209, //subps %xmm1,%xmm10
12790 65,15,84,202, //andps %xmm10,%xmm1
12791 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12792 102,69,15,254,200, //paddd %xmm8,%xmm9
12793 65,15,93,201, //minps %xmm9,%xmm1
12794 72,173, //lods %ds:(%rsi),%rax
12795 255,224, //jmpq *%rax
12796};
12797
Mike Kleine9ed07d2017-03-07 12:28:11 -050012798CODE const uint8_t sk_luminance_to_alpha_sse2[] = {
Mike Klein5224f462017-03-07 17:29:54 -050012799 184,208,179,89,62, //mov $0x3e59b3d0,%eax
12800 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050012801 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12802 15,89,216, //mulps %xmm0,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050012803 184,89,23,55,63, //mov $0x3f371759,%eax
12804 102,15,110,192, //movd %eax,%xmm0
12805 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12806 15,89,193, //mulps %xmm1,%xmm0
12807 15,88,195, //addps %xmm3,%xmm0
12808 184,152,221,147,61, //mov $0x3d93dd98,%eax
12809 102,15,110,216, //movd %eax,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050012810 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12811 15,89,218, //mulps %xmm2,%xmm3
Mike Klein5224f462017-03-07 17:29:54 -050012812 15,88,216, //addps %xmm0,%xmm3
Mike Kleine9ed07d2017-03-07 12:28:11 -050012813 72,173, //lods %ds:(%rsi),%rax
12814 15,87,192, //xorps %xmm0,%xmm0
12815 15,87,201, //xorps %xmm1,%xmm1
12816 15,87,210, //xorps %xmm2,%xmm2
12817 255,224, //jmpq *%rax
12818};
12819
Mike Klein894d5612017-03-07 07:59:52 -050012820CODE const uint8_t sk_matrix_2x3_sse2[] = {
12821 68,15,40,201, //movaps %xmm1,%xmm9
12822 68,15,40,192, //movaps %xmm0,%xmm8
12823 72,173, //lods %ds:(%rsi),%rax
12824 243,15,16,0, //movss (%rax),%xmm0
12825 243,15,16,72,4, //movss 0x4(%rax),%xmm1
12826 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12827 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12828 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12829 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
12830 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12831 69,15,89,209, //mulps %xmm9,%xmm10
12832 69,15,88,211, //addps %xmm11,%xmm10
12833 65,15,89,192, //mulps %xmm8,%xmm0
12834 65,15,88,194, //addps %xmm10,%xmm0
12835 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12836 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
12837 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12838 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
12839 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12840 69,15,89,209, //mulps %xmm9,%xmm10
12841 69,15,88,211, //addps %xmm11,%xmm10
12842 65,15,89,200, //mulps %xmm8,%xmm1
12843 65,15,88,202, //addps %xmm10,%xmm1
12844 72,173, //lods %ds:(%rsi),%rax
12845 255,224, //jmpq *%rax
12846};
12847
12848CODE const uint8_t sk_matrix_3x4_sse2[] = {
12849 68,15,40,201, //movaps %xmm1,%xmm9
12850 68,15,40,192, //movaps %xmm0,%xmm8
12851 72,173, //lods %ds:(%rsi),%rax
12852 243,15,16,0, //movss (%rax),%xmm0
12853 243,15,16,72,4, //movss 0x4(%rax),%xmm1
12854 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12855 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
12856 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12857 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
12858 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12859 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
12860 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12861 68,15,89,218, //mulps %xmm2,%xmm11
12862 69,15,88,220, //addps %xmm12,%xmm11
12863 69,15,89,209, //mulps %xmm9,%xmm10
12864 69,15,88,211, //addps %xmm11,%xmm10
12865 65,15,89,192, //mulps %xmm8,%xmm0
12866 65,15,88,194, //addps %xmm10,%xmm0
12867 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12868 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
12869 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12870 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
12871 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12872 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
12873 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12874 68,15,89,218, //mulps %xmm2,%xmm11
12875 69,15,88,220, //addps %xmm12,%xmm11
12876 69,15,89,209, //mulps %xmm9,%xmm10
12877 69,15,88,211, //addps %xmm11,%xmm10
12878 65,15,89,200, //mulps %xmm8,%xmm1
12879 65,15,88,202, //addps %xmm10,%xmm1
12880 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12881 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12882 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
12883 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12884 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
12885 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12886 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
12887 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12888 68,15,89,226, //mulps %xmm2,%xmm12
12889 69,15,88,229, //addps %xmm13,%xmm12
12890 69,15,89,217, //mulps %xmm9,%xmm11
12891 69,15,88,220, //addps %xmm12,%xmm11
12892 69,15,89,208, //mulps %xmm8,%xmm10
12893 69,15,88,211, //addps %xmm11,%xmm10
12894 72,173, //lods %ds:(%rsi),%rax
12895 65,15,40,210, //movaps %xmm10,%xmm2
12896 255,224, //jmpq *%rax
12897};
12898
Mike Kleine9ed07d2017-03-07 12:28:11 -050012899CODE const uint8_t sk_matrix_4x5_sse2[] = {
12900 68,15,40,201, //movaps %xmm1,%xmm9
12901 68,15,40,192, //movaps %xmm0,%xmm8
12902 72,173, //lods %ds:(%rsi),%rax
12903 243,15,16,0, //movss (%rax),%xmm0
12904 243,15,16,72,4, //movss 0x4(%rax),%xmm1
12905 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12906 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
12907 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12908 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
12909 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12910 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
12911 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12912 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
12913 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12914 68,15,89,227, //mulps %xmm3,%xmm12
12915 69,15,88,229, //addps %xmm13,%xmm12
12916 68,15,89,218, //mulps %xmm2,%xmm11
12917 69,15,88,220, //addps %xmm12,%xmm11
12918 69,15,89,209, //mulps %xmm9,%xmm10
12919 69,15,88,211, //addps %xmm11,%xmm10
12920 65,15,89,192, //mulps %xmm8,%xmm0
12921 65,15,88,194, //addps %xmm10,%xmm0
12922 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12923 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
12924 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12925 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
12926 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12927 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
12928 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12929 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
12930 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12931 68,15,89,227, //mulps %xmm3,%xmm12
12932 69,15,88,229, //addps %xmm13,%xmm12
12933 68,15,89,218, //mulps %xmm2,%xmm11
12934 69,15,88,220, //addps %xmm12,%xmm11
12935 69,15,89,209, //mulps %xmm9,%xmm10
12936 69,15,88,211, //addps %xmm11,%xmm10
12937 65,15,89,200, //mulps %xmm8,%xmm1
12938 65,15,88,202, //addps %xmm10,%xmm1
12939 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12940 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12941 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
12942 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12943 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
12944 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12945 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
12946 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12947 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
12948 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
12949 68,15,89,235, //mulps %xmm3,%xmm13
12950 69,15,88,238, //addps %xmm14,%xmm13
12951 68,15,89,226, //mulps %xmm2,%xmm12
12952 69,15,88,229, //addps %xmm13,%xmm12
12953 69,15,89,217, //mulps %xmm9,%xmm11
12954 69,15,88,220, //addps %xmm12,%xmm11
12955 69,15,89,208, //mulps %xmm8,%xmm10
12956 69,15,88,211, //addps %xmm11,%xmm10
12957 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
12958 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12959 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
12960 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12961 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
12962 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12963 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
12964 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
12965 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
12966 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
12967 68,15,89,243, //mulps %xmm3,%xmm14
12968 69,15,88,247, //addps %xmm15,%xmm14
12969 68,15,89,234, //mulps %xmm2,%xmm13
12970 69,15,88,238, //addps %xmm14,%xmm13
12971 69,15,89,225, //mulps %xmm9,%xmm12
12972 69,15,88,229, //addps %xmm13,%xmm12
12973 69,15,89,216, //mulps %xmm8,%xmm11
12974 69,15,88,220, //addps %xmm12,%xmm11
12975 72,173, //lods %ds:(%rsi),%rax
12976 65,15,40,210, //movaps %xmm10,%xmm2
12977 65,15,40,219, //movaps %xmm11,%xmm3
12978 255,224, //jmpq *%rax
12979};
12980
Mike Klein894d5612017-03-07 07:59:52 -050012981CODE const uint8_t sk_matrix_perspective_sse2[] = {
12982 68,15,40,192, //movaps %xmm0,%xmm8
12983 72,173, //lods %ds:(%rsi),%rax
12984 243,15,16,0, //movss (%rax),%xmm0
12985 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
12986 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12987 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12988 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12989 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12990 68,15,89,201, //mulps %xmm1,%xmm9
12991 69,15,88,202, //addps %xmm10,%xmm9
12992 65,15,89,192, //mulps %xmm8,%xmm0
12993 65,15,88,193, //addps %xmm9,%xmm0
12994 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
12995 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12996 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
12997 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12998 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
12999 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
13000 68,15,89,209, //mulps %xmm1,%xmm10
13001 69,15,88,211, //addps %xmm11,%xmm10
13002 69,15,89,200, //mulps %xmm8,%xmm9
13003 69,15,88,202, //addps %xmm10,%xmm9
13004 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
13005 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
13006 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
13007 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
13008 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
13009 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
13010 68,15,89,217, //mulps %xmm1,%xmm11
13011 69,15,88,220, //addps %xmm12,%xmm11
13012 69,15,89,208, //mulps %xmm8,%xmm10
13013 69,15,88,211, //addps %xmm11,%xmm10
13014 65,15,83,202, //rcpps %xmm10,%xmm1
13015 15,89,193, //mulps %xmm1,%xmm0
13016 68,15,89,201, //mulps %xmm1,%xmm9
13017 72,173, //lods %ds:(%rsi),%rax
13018 65,15,40,201, //movaps %xmm9,%xmm1
13019 255,224, //jmpq *%rax
13020};
13021
13022CODE const uint8_t sk_linear_gradient_2stops_sse2[] = {
13023 72,173, //lods %ds:(%rsi),%rax
13024 68,15,16,8, //movups (%rax),%xmm9
13025 15,16,88,16, //movups 0x10(%rax),%xmm3
13026 68,15,40,195, //movaps %xmm3,%xmm8
13027 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
13028 65,15,40,201, //movaps %xmm9,%xmm1
13029 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
13030 68,15,89,192, //mulps %xmm0,%xmm8
13031 68,15,88,193, //addps %xmm1,%xmm8
13032 15,40,203, //movaps %xmm3,%xmm1
13033 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
13034 65,15,40,209, //movaps %xmm9,%xmm2
13035 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
13036 15,89,200, //mulps %xmm0,%xmm1
13037 15,88,202, //addps %xmm2,%xmm1
13038 15,40,211, //movaps %xmm3,%xmm2
13039 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
13040 69,15,40,209, //movaps %xmm9,%xmm10
13041 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
13042 15,89,208, //mulps %xmm0,%xmm2
13043 65,15,88,210, //addps %xmm10,%xmm2
13044 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
13045 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
13046 15,89,216, //mulps %xmm0,%xmm3
13047 65,15,88,217, //addps %xmm9,%xmm3
13048 72,173, //lods %ds:(%rsi),%rax
13049 65,15,40,192, //movaps %xmm8,%xmm0
13050 255,224, //jmpq *%rax
13051};
13052#endif