blob: 37e0be6ea4e7669ea0b458ce048aee4372faa22f [file] [log] [blame]
Mike Klein894d5612017-03-07 07:59:52 -05001/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8// This file is generated semi-automatically with this command:
9// $ src/jumper/build_stages.py
10
11#include <stdint.h>
12
13#if defined(_MSC_VER)
14 #pragma section("code", read,execute)
15 #define CODE extern "C" __declspec(allocate("code"))
16#elif defined(__MACH__)
17 #define CODE extern "C" __attribute__((section("__TEXT,__text")))
18#else
Florin Malitae353e812017-03-08 13:07:51 -050019 #define CODE extern "C" __attribute__((section(".text#")))
Mike Klein894d5612017-03-07 07:59:52 -050020#endif
21
22#if defined(__aarch64__)
23
24CODE const uint32_t sk_start_pipeline_aarch64[] = {
25 0xa9bd5bf7, //stp x23, x22, [sp, #-48]!
26 0xa90153f5, //stp x21, x20, [sp, #16]
27 0xa9027bf3, //stp x19, x30, [sp, #32]
28 0xaa0103f5, //mov x21, x1
29 0xf84086b7, //ldr x23, [x21], #8
30 0xaa0003f6, //mov x22, x0
31 0xaa0303f3, //mov x19, x3
32 0xaa0203f4, //mov x20, x2
33 0x910012c8, //add x8, x22, #0x4
34 0xeb13011f, //cmp x8, x19
35 0x54000069, //b.ls 34 <sk_start_pipeline_aarch64+0x34> // b.plast
36 0xaa1603e0, //mov x0, x22
37 0x14000012, //b 78 <sk_start_pipeline_aarch64+0x78>
38 0x6f00e400, //movi v0.2d, #0x0
39 0x6f00e401, //movi v1.2d, #0x0
40 0x6f00e402, //movi v2.2d, #0x0
41 0x6f00e403, //movi v3.2d, #0x0
42 0x6f00e404, //movi v4.2d, #0x0
43 0x6f00e405, //movi v5.2d, #0x0
44 0x6f00e406, //movi v6.2d, #0x0
45 0x6f00e407, //movi v7.2d, #0x0
46 0xaa1603e0, //mov x0, x22
47 0xaa1503e1, //mov x1, x21
48 0xaa1403e2, //mov x2, x20
49 0xd63f02e0, //blr x23
50 0x910022c8, //add x8, x22, #0x8
51 0x910012c0, //add x0, x22, #0x4
52 0xeb13011f, //cmp x8, x19
53 0xaa0003f6, //mov x22, x0
54 0x54fffe09, //b.ls 34 <sk_start_pipeline_aarch64+0x34> // b.plast
55 0xa9427bf3, //ldp x19, x30, [sp, #32]
56 0xa94153f5, //ldp x21, x20, [sp, #16]
57 0xa8c35bf7, //ldp x23, x22, [sp], #48
58 0xd65f03c0, //ret
59};
60
61CODE const uint32_t sk_just_return_aarch64[] = {
62 0xd65f03c0, //ret
63};
64
65CODE const uint32_t sk_seed_shader_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050066 0xa8c10c28, //ldp x8, x3, [x1], #16
67 0x3cc14046, //ldur q6, [x2, #20]
Mike Klein894d5612017-03-07 07:59:52 -050068 0x4e040c00, //dup v0.4s, w0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050069 0x4f0167e7, //movi v7.4s, #0x3f, lsl #24
Mike Klein894d5612017-03-07 07:59:52 -050070 0x4d40c901, //ld1r {v1.4s}, [x8]
Mike Klein894d5612017-03-07 07:59:52 -050071 0x4e21d800, //scvtf v0.4s, v0.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050072 0x4e27d400, //fadd v0.4s, v0.4s, v7.4s
73 0x4f03f602, //fmov v2.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -050074 0x4e21d821, //scvtf v1.4s, v1.4s
Mike Klein894d5612017-03-07 07:59:52 -050075 0x6f00e403, //movi v3.2d, #0x0
76 0x6f00e404, //movi v4.2d, #0x0
77 0x6f00e405, //movi v5.2d, #0x0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050078 0x4e26d400, //fadd v0.4s, v0.4s, v6.4s
Mike Klein894d5612017-03-07 07:59:52 -050079 0x6f00e406, //movi v6.2d, #0x0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050080 0x4e27d421, //fadd v1.4s, v1.4s, v7.4s
Mike Klein894d5612017-03-07 07:59:52 -050081 0x6f00e407, //movi v7.2d, #0x0
Mike Klein894d5612017-03-07 07:59:52 -050082 0xd61f0060, //br x3
83};
84
85CODE const uint32_t sk_constant_color_aarch64[] = {
86 0xa8c10c28, //ldp x8, x3, [x1], #16
87 0x3dc00103, //ldr q3, [x8]
88 0x4e040460, //dup v0.4s, v3.s[0]
89 0x4e0c0461, //dup v1.4s, v3.s[1]
90 0x4e140462, //dup v2.4s, v3.s[2]
91 0x4e1c0463, //dup v3.4s, v3.s[3]
92 0xd61f0060, //br x3
93};
94
95CODE const uint32_t sk_clear_aarch64[] = {
96 0xf8408423, //ldr x3, [x1], #8
97 0x6f00e400, //movi v0.2d, #0x0
98 0x6f00e401, //movi v1.2d, #0x0
99 0x6f00e402, //movi v2.2d, #0x0
100 0x6f00e403, //movi v3.2d, #0x0
101 0xd61f0060, //br x3
102};
103
104CODE const uint32_t sk_plus__aarch64[] = {
105 0xf8408423, //ldr x3, [x1], #8
106 0x4e24d400, //fadd v0.4s, v0.4s, v4.4s
107 0x4e25d421, //fadd v1.4s, v1.4s, v5.4s
108 0x4e26d442, //fadd v2.4s, v2.4s, v6.4s
109 0x4e27d463, //fadd v3.4s, v3.4s, v7.4s
110 0xd61f0060, //br x3
111};
112
113CODE const uint32_t sk_srcover_aarch64[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500114 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500115 0x4f03f610, //fmov v16.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500116 0x4ea3d610, //fsub v16.4s, v16.4s, v3.4s
117 0x4e24ce00, //fmla v0.4s, v16.4s, v4.4s
118 0x4e25ce01, //fmla v1.4s, v16.4s, v5.4s
119 0x4e26ce02, //fmla v2.4s, v16.4s, v6.4s
120 0x4e27ce03, //fmla v3.4s, v16.4s, v7.4s
121 0xd61f0060, //br x3
122};
123
124CODE const uint32_t sk_dstover_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500125 0x4f03f611, //fmov v17.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500126 0xf8408423, //ldr x3, [x1], #8
127 0x4ea41c90, //mov v16.16b, v4.16b
Mike Klein894d5612017-03-07 07:59:52 -0500128 0x4ea7d634, //fsub v20.4s, v17.4s, v7.4s
129 0x4ea51cb1, //mov v17.16b, v5.16b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500130 0x4ea61cd2, //mov v18.16b, v6.16b
Mike Klein894d5612017-03-07 07:59:52 -0500131 0x4ea71cf3, //mov v19.16b, v7.16b
132 0x4e20ce90, //fmla v16.4s, v20.4s, v0.4s
133 0x4e21ce91, //fmla v17.4s, v20.4s, v1.4s
134 0x4e22ce92, //fmla v18.4s, v20.4s, v2.4s
135 0x4e23ce93, //fmla v19.4s, v20.4s, v3.4s
136 0x4eb01e00, //mov v0.16b, v16.16b
137 0x4eb11e21, //mov v1.16b, v17.16b
138 0x4eb21e42, //mov v2.16b, v18.16b
139 0x4eb31e63, //mov v3.16b, v19.16b
140 0xd61f0060, //br x3
141};
142
143CODE const uint32_t sk_clamp_0_aarch64[] = {
144 0xf8408423, //ldr x3, [x1], #8
145 0x6f00e410, //movi v16.2d, #0x0
146 0x4e30f400, //fmax v0.4s, v0.4s, v16.4s
147 0x4e30f421, //fmax v1.4s, v1.4s, v16.4s
148 0x4e30f442, //fmax v2.4s, v2.4s, v16.4s
149 0x4e30f463, //fmax v3.4s, v3.4s, v16.4s
150 0xd61f0060, //br x3
151};
152
153CODE const uint32_t sk_clamp_1_aarch64[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500154 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500155 0x4f03f610, //fmov v16.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500156 0x4eb0f400, //fmin v0.4s, v0.4s, v16.4s
157 0x4eb0f421, //fmin v1.4s, v1.4s, v16.4s
158 0x4eb0f442, //fmin v2.4s, v2.4s, v16.4s
159 0x4eb0f463, //fmin v3.4s, v3.4s, v16.4s
160 0xd61f0060, //br x3
161};
162
163CODE const uint32_t sk_clamp_a_aarch64[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500164 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500165 0x4f03f610, //fmov v16.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500166 0x4eb0f463, //fmin v3.4s, v3.4s, v16.4s
167 0x4ea3f400, //fmin v0.4s, v0.4s, v3.4s
168 0x4ea3f421, //fmin v1.4s, v1.4s, v3.4s
169 0x4ea3f442, //fmin v2.4s, v2.4s, v3.4s
170 0xd61f0060, //br x3
171};
172
173CODE const uint32_t sk_set_rgb_aarch64[] = {
174 0xa8c10c28, //ldp x8, x3, [x1], #16
175 0xaa0803e9, //mov x9, x8
176 0x4ddfc920, //ld1r {v0.4s}, [x9], #4
177 0x91002108, //add x8, x8, #0x8
178 0x4d40c902, //ld1r {v2.4s}, [x8]
179 0x4d40c921, //ld1r {v1.4s}, [x9]
180 0xd61f0060, //br x3
181};
182
183CODE const uint32_t sk_swap_rb_aarch64[] = {
184 0xf8408423, //ldr x3, [x1], #8
185 0x4ea01c10, //mov v16.16b, v0.16b
186 0x4ea21c40, //mov v0.16b, v2.16b
187 0x4eb01e02, //mov v2.16b, v16.16b
188 0xd61f0060, //br x3
189};
190
191CODE const uint32_t sk_swap_aarch64[] = {
192 0xf8408423, //ldr x3, [x1], #8
193 0x4ea31c70, //mov v16.16b, v3.16b
194 0x4ea21c51, //mov v17.16b, v2.16b
195 0x4ea11c32, //mov v18.16b, v1.16b
196 0x4ea01c13, //mov v19.16b, v0.16b
197 0x4ea41c80, //mov v0.16b, v4.16b
198 0x4ea51ca1, //mov v1.16b, v5.16b
199 0x4ea61cc2, //mov v2.16b, v6.16b
200 0x4ea71ce3, //mov v3.16b, v7.16b
201 0x4eb31e64, //mov v4.16b, v19.16b
202 0x4eb21e45, //mov v5.16b, v18.16b
203 0x4eb11e26, //mov v6.16b, v17.16b
204 0x4eb01e07, //mov v7.16b, v16.16b
205 0xd61f0060, //br x3
206};
207
208CODE const uint32_t sk_move_src_dst_aarch64[] = {
209 0xf8408423, //ldr x3, [x1], #8
210 0x4ea01c04, //mov v4.16b, v0.16b
211 0x4ea11c25, //mov v5.16b, v1.16b
212 0x4ea21c46, //mov v6.16b, v2.16b
213 0x4ea31c67, //mov v7.16b, v3.16b
214 0xd61f0060, //br x3
215};
216
217CODE const uint32_t sk_move_dst_src_aarch64[] = {
218 0xf8408423, //ldr x3, [x1], #8
219 0x4ea41c80, //mov v0.16b, v4.16b
220 0x4ea51ca1, //mov v1.16b, v5.16b
221 0x4ea61cc2, //mov v2.16b, v6.16b
222 0x4ea71ce3, //mov v3.16b, v7.16b
223 0xd61f0060, //br x3
224};
225
226CODE const uint32_t sk_premul_aarch64[] = {
227 0xf8408423, //ldr x3, [x1], #8
228 0x6e23dc00, //fmul v0.4s, v0.4s, v3.4s
229 0x6e23dc21, //fmul v1.4s, v1.4s, v3.4s
230 0x6e23dc42, //fmul v2.4s, v2.4s, v3.4s
231 0xd61f0060, //br x3
232};
233
234CODE const uint32_t sk_unpremul_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500235 0x4f03f611, //fmov v17.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500236 0xf8408423, //ldr x3, [x1], #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500237 0x4ea0d870, //fcmeq v16.4s, v3.4s, #0.0
238 0x6e23fe31, //fdiv v17.4s, v17.4s, v3.4s
239 0x4e701e30, //bic v16.16b, v17.16b, v16.16b
Mike Klein894d5612017-03-07 07:59:52 -0500240 0x6e20de00, //fmul v0.4s, v16.4s, v0.4s
241 0x6e21de01, //fmul v1.4s, v16.4s, v1.4s
242 0x6e22de02, //fmul v2.4s, v16.4s, v2.4s
243 0xd61f0060, //br x3
244};
245
246CODE const uint32_t sk_from_srgb_aarch64[] = {
247 0x9100e048, //add x8, x2, #0x38
248 0x4d40c910, //ld1r {v16.4s}, [x8]
249 0x9100d048, //add x8, x2, #0x34
250 0x2d47cc52, //ldp s18, s19, [x2, #60]
251 0x4d40c911, //ld1r {v17.4s}, [x8]
252 0x6e22dc54, //fmul v20.4s, v2.4s, v2.4s
253 0x4eb01e15, //mov v21.16b, v16.16b
254 0x4eb01e17, //mov v23.16b, v16.16b
255 0x4f921050, //fmla v16.4s, v2.4s, v18.s[0]
256 0x4eb11e36, //mov v22.16b, v17.16b
257 0x4eb11e38, //mov v24.16b, v17.16b
258 0x4e34ce11, //fmla v17.4s, v16.4s, v20.4s
259 0x6e20dc10, //fmul v16.4s, v0.4s, v0.4s
260 0x91011048, //add x8, x2, #0x44
261 0x4f921015, //fmla v21.4s, v0.4s, v18.s[0]
262 0x4e30ceb6, //fmla v22.4s, v21.4s, v16.4s
263 0x4d40c910, //ld1r {v16.4s}, [x8]
264 0xf8408423, //ldr x3, [x1], #8
265 0x6e21dc34, //fmul v20.4s, v1.4s, v1.4s
266 0x4f921037, //fmla v23.4s, v1.4s, v18.s[0]
267 0x4f939015, //fmul v21.4s, v0.4s, v19.s[0]
268 0x4f939032, //fmul v18.4s, v1.4s, v19.s[0]
269 0x4f939053, //fmul v19.4s, v2.4s, v19.s[0]
270 0x6ea0e600, //fcmgt v0.4s, v16.4s, v0.4s
271 0x6ea1e601, //fcmgt v1.4s, v16.4s, v1.4s
272 0x6ea2e602, //fcmgt v2.4s, v16.4s, v2.4s
273 0x4e34cef8, //fmla v24.4s, v23.4s, v20.4s
274 0x6e761ea0, //bsl v0.16b, v21.16b, v22.16b
275 0x6e781e41, //bsl v1.16b, v18.16b, v24.16b
276 0x6e711e62, //bsl v2.16b, v19.16b, v17.16b
277 0xd61f0060, //br x3
278};
279
280CODE const uint32_t sk_to_srgb_aarch64[] = {
281 0x6ea1d811, //frsqrte v17.4s, v0.4s
282 0x6ea1d835, //frsqrte v21.4s, v1.4s
283 0x6e31de37, //fmul v23.4s, v17.4s, v17.4s
284 0x6ea1d856, //frsqrte v22.4s, v2.4s
285 0x6e35deb9, //fmul v25.4s, v21.4s, v21.4s
286 0x4eb7fc17, //frsqrts v23.4s, v0.4s, v23.4s
287 0x91015048, //add x8, x2, #0x54
288 0x6e36deda, //fmul v26.4s, v22.4s, v22.4s
289 0x4eb9fc39, //frsqrts v25.4s, v1.4s, v25.4s
290 0x6e37de31, //fmul v17.4s, v17.4s, v23.4s
291 0x4d40c914, //ld1r {v20.4s}, [x8]
292 0x4ebafc5a, //frsqrts v26.4s, v2.4s, v26.4s
293 0x6e39deb5, //fmul v21.4s, v21.4s, v25.4s
294 0x4ea1da37, //frecpe v23.4s, v17.4s
295 0xbd405053, //ldr s19, [x2, #80]
296 0x91016048, //add x8, x2, #0x58
297 0x6e3aded6, //fmul v22.4s, v22.4s, v26.4s
298 0x4ea1dabb, //frecpe v27.4s, v21.4s
299 0x4e37fe3d, //frecps v29.4s, v17.4s, v23.4s
300 0x2d494052, //ldp s18, s16, [x2, #72]
301 0x4d40c918, //ld1r {v24.4s}, [x8]
302 0x4ea1dadc, //frecpe v28.4s, v22.4s
303 0x6e3ddef7, //fmul v23.4s, v23.4s, v29.4s
304 0x4e3bfebd, //frecps v29.4s, v21.4s, v27.4s
305 0x6e3ddf7b, //fmul v27.4s, v27.4s, v29.4s
306 0x4e3cfedd, //frecps v29.4s, v22.4s, v28.4s
307 0x6e3ddf9c, //fmul v28.4s, v28.4s, v29.4s
308 0x4eb41e9d, //mov v29.16b, v20.16b
309 0x6ea1da39, //frsqrte v25.4s, v17.4s
310 0x4f9312fd, //fmla v29.4s, v23.4s, v19.s[0]
311 0x4eb41e97, //mov v23.16b, v20.16b
312 0x4f92901a, //fmul v26.4s, v0.4s, v18.s[0]
313 0x4f931377, //fmla v23.4s, v27.4s, v19.s[0]
314 0x4f931394, //fmla v20.4s, v28.4s, v19.s[0]
315 0x4f929033, //fmul v19.4s, v1.4s, v18.s[0]
316 0x4f929052, //fmul v18.4s, v2.4s, v18.s[0]
317 0x6ea0e700, //fcmgt v0.4s, v24.4s, v0.4s
318 0x6ea1e701, //fcmgt v1.4s, v24.4s, v1.4s
319 0x6ea2e702, //fcmgt v2.4s, v24.4s, v2.4s
320 0x6e39df38, //fmul v24.4s, v25.4s, v25.4s
321 0x6ea1dabb, //frsqrte v27.4s, v21.4s
322 0x4eb8fe31, //frsqrts v17.4s, v17.4s, v24.4s
323 0x6ea1dadc, //frsqrte v28.4s, v22.4s
324 0x6e3bdf78, //fmul v24.4s, v27.4s, v27.4s
325 0x6e31df31, //fmul v17.4s, v25.4s, v17.4s
326 0x4eb8feb5, //frsqrts v21.4s, v21.4s, v24.4s
327 0x6e3cdf98, //fmul v24.4s, v28.4s, v28.4s
328 0x4f90123d, //fmla v29.4s, v17.4s, v16.s[0]
329 0x4d40c851, //ld1r {v17.4s}, [x2]
330 0x4eb8fed6, //frsqrts v22.4s, v22.4s, v24.4s
331 0x6e35df75, //fmul v21.4s, v27.4s, v21.4s
332 0x6e36df96, //fmul v22.4s, v28.4s, v22.4s
333 0xf8408423, //ldr x3, [x1], #8
334 0x4f9012b7, //fmla v23.4s, v21.4s, v16.s[0]
335 0x4f9012d4, //fmla v20.4s, v22.4s, v16.s[0]
336 0x4ebdf630, //fmin v16.4s, v17.4s, v29.4s
337 0x4eb7f635, //fmin v21.4s, v17.4s, v23.4s
338 0x4eb4f631, //fmin v17.4s, v17.4s, v20.4s
339 0x6e701f40, //bsl v0.16b, v26.16b, v16.16b
340 0x6e751e61, //bsl v1.16b, v19.16b, v21.16b
341 0x6e711e42, //bsl v2.16b, v18.16b, v17.16b
342 0xd61f0060, //br x3
343};
344
345CODE const uint32_t sk_scale_1_float_aarch64[] = {
346 0xa8c10c28, //ldp x8, x3, [x1], #16
347 0xbd400110, //ldr s16, [x8]
348 0x4f909000, //fmul v0.4s, v0.4s, v16.s[0]
349 0x4f909021, //fmul v1.4s, v1.4s, v16.s[0]
350 0x4f909042, //fmul v2.4s, v2.4s, v16.s[0]
351 0x4f909063, //fmul v3.4s, v3.4s, v16.s[0]
352 0xd61f0060, //br x3
353};
354
355CODE const uint32_t sk_scale_u8_aarch64[] = {
356 0xa8c10c28, //ldp x8, x3, [x1], #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500357 0x52a77009, //mov w9, #0x3b800000
358 0x72901029, //movk w9, #0x8081
359 0x4e040d30, //dup v16.4s, w9
Mike Klein894d5612017-03-07 07:59:52 -0500360 0xf9400108, //ldr x8, [x8]
361 0x8b000108, //add x8, x8, x0
362 0x39400109, //ldrb w9, [x8]
363 0x3940050a, //ldrb w10, [x8, #1]
364 0x3940090b, //ldrb w11, [x8, #2]
365 0x39400d08, //ldrb w8, [x8, #3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500366 0x4e021d31, //mov v17.h[0], w9
367 0x4e061d51, //mov v17.h[1], w10
368 0x4e0a1d71, //mov v17.h[2], w11
369 0x4e0e1d11, //mov v17.h[3], w8
370 0x2f07b7f1, //bic v17.4h, #0xff, lsl #8
371 0x2f10a631, //uxtl v17.4s, v17.4h
372 0x6e21da31, //ucvtf v17.4s, v17.4s
373 0x6e30de30, //fmul v16.4s, v17.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500374 0x6e20de00, //fmul v0.4s, v16.4s, v0.4s
375 0x6e21de01, //fmul v1.4s, v16.4s, v1.4s
376 0x6e22de02, //fmul v2.4s, v16.4s, v2.4s
377 0x6e23de03, //fmul v3.4s, v16.4s, v3.4s
378 0xd61f0060, //br x3
379};
380
381CODE const uint32_t sk_lerp_1_float_aarch64[] = {
382 0xa8c10c28, //ldp x8, x3, [x1], #16
383 0x4ea4d411, //fsub v17.4s, v0.4s, v4.4s
384 0x4ea41c80, //mov v0.16b, v4.16b
385 0x4ea5d432, //fsub v18.4s, v1.4s, v5.4s
386 0xbd400110, //ldr s16, [x8]
387 0x4ea51ca1, //mov v1.16b, v5.16b
388 0x4f901220, //fmla v0.4s, v17.4s, v16.s[0]
389 0x4ea6d451, //fsub v17.4s, v2.4s, v6.4s
390 0x4f901241, //fmla v1.4s, v18.4s, v16.s[0]
391 0x4ea61cc2, //mov v2.16b, v6.16b
392 0x4ea7d472, //fsub v18.4s, v3.4s, v7.4s
393 0x4ea71ce3, //mov v3.16b, v7.16b
394 0x4f901222, //fmla v2.4s, v17.4s, v16.s[0]
395 0x4f901243, //fmla v3.4s, v18.4s, v16.s[0]
396 0xd61f0060, //br x3
397};
398
399CODE const uint32_t sk_lerp_u8_aarch64[] = {
400 0xa8c10c28, //ldp x8, x3, [x1], #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500401 0x52a77009, //mov w9, #0x3b800000
402 0x72901029, //movk w9, #0x8081
403 0x4e040d30, //dup v16.4s, w9
Mike Klein894d5612017-03-07 07:59:52 -0500404 0xf9400108, //ldr x8, [x8]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500405 0x4ea4d412, //fsub v18.4s, v0.4s, v4.4s
Mike Klein894d5612017-03-07 07:59:52 -0500406 0x8b000108, //add x8, x8, x0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500407 0x3940010a, //ldrb w10, [x8]
408 0x39400509, //ldrb w9, [x8, #1]
Mike Klein894d5612017-03-07 07:59:52 -0500409 0x3940090b, //ldrb w11, [x8, #2]
410 0x39400d08, //ldrb w8, [x8, #3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500411 0x4e021d51, //mov v17.h[0], w10
412 0x4e061d31, //mov v17.h[1], w9
413 0x4e0a1d71, //mov v17.h[2], w11
414 0x4e0e1d11, //mov v17.h[3], w8
415 0x2f07b7f1, //bic v17.4h, #0xff, lsl #8
416 0x2f10a620, //uxtl v0.4s, v17.4h
Mike Klein894d5612017-03-07 07:59:52 -0500417 0x6e21d800, //ucvtf v0.4s, v0.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500418 0x6e30dc10, //fmul v16.4s, v0.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500419 0x4ea41c80, //mov v0.16b, v4.16b
420 0x4ea5d431, //fsub v17.4s, v1.4s, v5.4s
421 0x4ea51ca1, //mov v1.16b, v5.16b
422 0x4e32ce00, //fmla v0.4s, v16.4s, v18.4s
423 0x4ea6d452, //fsub v18.4s, v2.4s, v6.4s
424 0x4e31ce01, //fmla v1.4s, v16.4s, v17.4s
425 0x4ea61cc2, //mov v2.16b, v6.16b
426 0x4ea7d471, //fsub v17.4s, v3.4s, v7.4s
427 0x4ea71ce3, //mov v3.16b, v7.16b
428 0x4e32ce02, //fmla v2.4s, v16.4s, v18.4s
429 0x4e31ce03, //fmla v3.4s, v16.4s, v17.4s
430 0xd61f0060, //br x3
431};
432
433CODE const uint32_t sk_lerp_565_aarch64[] = {
434 0xa8c10c28, //ldp x8, x3, [x1], #16
435 0xd37ff809, //lsl x9, x0, #1
436 0x2d4ec851, //ldp s17, s18, [x2, #116]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500437 0x4ea4d414, //fsub v20.4s, v0.4s, v4.4s
Mike Klein894d5612017-03-07 07:59:52 -0500438 0xf9400108, //ldr x8, [x8]
Mike Klein894d5612017-03-07 07:59:52 -0500439 0xfc696903, //ldr d3, [x8, x9]
440 0x9101a048, //add x8, x2, #0x68
441 0x4d40c910, //ld1r {v16.4s}, [x8]
442 0x9101b048, //add x8, x2, #0x6c
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500443 0x4d40c913, //ld1r {v19.4s}, [x8]
Mike Klein894d5612017-03-07 07:59:52 -0500444 0x9101c048, //add x8, x2, #0x70
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500445 0x2f10a463, //uxtl v3.4s, v3.4h
446 0x4d40c915, //ld1r {v21.4s}, [x8]
447 0x4e231e00, //and v0.16b, v16.16b, v3.16b
448 0x4e21d800, //scvtf v0.4s, v0.4s
449 0x4f919010, //fmul v16.4s, v0.4s, v17.s[0]
450 0x4ea41c80, //mov v0.16b, v4.16b
451 0xbd407c51, //ldr s17, [x2, #124]
452 0x4e34ce00, //fmla v0.4s, v16.4s, v20.4s
453 0x4e231e70, //and v16.16b, v19.16b, v3.16b
454 0x4e231ea3, //and v3.16b, v21.16b, v3.16b
455 0x4ea5d433, //fsub v19.4s, v1.4s, v5.4s
456 0x4e21da01, //scvtf v1.4s, v16.4s
457 0x4f929030, //fmul v16.4s, v1.4s, v18.s[0]
458 0x4ea6d452, //fsub v18.4s, v2.4s, v6.4s
459 0x4e21d862, //scvtf v2.4s, v3.4s
Mike Klein894d5612017-03-07 07:59:52 -0500460 0x4ea51ca1, //mov v1.16b, v5.16b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500461 0x4f919043, //fmul v3.4s, v2.4s, v17.s[0]
Mike Klein894d5612017-03-07 07:59:52 -0500462 0x4ea61cc2, //mov v2.16b, v6.16b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500463 0x4e33ce01, //fmla v1.4s, v16.4s, v19.4s
464 0x4e32cc62, //fmla v2.4s, v3.4s, v18.4s
465 0x4f03f603, //fmov v3.4s, #1.000000000000000000e+00
Mike Klein894d5612017-03-07 07:59:52 -0500466 0xd61f0060, //br x3
467};
468
469CODE const uint32_t sk_load_tables_aarch64[] = {
470 0xa8c10c28, //ldp x8, x3, [x1], #16
471 0x9100404b, //add x11, x2, #0x10
472 0x4d40c960, //ld1r {v0.4s}, [x11]
473 0xd37ef409, //lsl x9, x0, #2
474 0xa9402d0a, //ldp x10, x11, [x8]
475 0x3ce96942, //ldr q2, [x10, x9]
476 0xa9412109, //ldp x9, x8, [x8, #16]
477 0x4e221c01, //and v1.16b, v0.16b, v2.16b
478 0x0e143c2c, //mov w12, v1.s[2]
479 0xbc6c5971, //ldr s17, [x11, w12, uxtw #2]
480 0x1e26002c, //fmov w12, s1
481 0x6f380443, //ushr v3.4s, v2.4s, #8
482 0x6f300450, //ushr v16.4s, v2.4s, #16
483 0x8b2c496c, //add x12, x11, w12, uxtw #2
484 0x0e0c3c2a, //mov w10, v1.s[1]
485 0x0e1c3c2d, //mov w13, v1.s[3]
486 0x4e231c01, //and v1.16b, v0.16b, v3.16b
487 0x4e301c03, //and v3.16b, v0.16b, v16.16b
488 0x0d408180, //ld1 {v0.s}[0], [x12]
489 0x0e143c2c, //mov w12, v1.s[2]
490 0xbc6c5932, //ldr s18, [x9, w12, uxtw #2]
491 0x1e26002c, //fmov w12, s1
492 0x8b2a496a, //add x10, x11, w10, uxtw #2
493 0xbc6d5970, //ldr s16, [x11, w13, uxtw #2]
494 0x0e0c3c2b, //mov w11, v1.s[1]
495 0x0e1c3c2d, //mov w13, v1.s[3]
496 0x8b2c492c, //add x12, x9, w12, uxtw #2
497 0xbc6d5933, //ldr s19, [x9, w13, uxtw #2]
498 0x0e0c3c6d, //mov w13, v3.s[1]
499 0x8b2b4929, //add x9, x9, w11, uxtw #2
500 0x0e143c6b, //mov w11, v3.s[2]
501 0x0d408181, //ld1 {v1.s}[0], [x12]
502 0x0e1c3c6c, //mov w12, v3.s[3]
503 0x0d409140, //ld1 {v0.s}[1], [x10]
504 0x1e26006a, //fmov w10, s3
505 0xbd400c43, //ldr s3, [x2, #12]
506 0x6f280442, //ushr v2.4s, v2.4s, #24
507 0x4e21d842, //scvtf v2.4s, v2.4s
508 0x8b2a490a, //add x10, x8, w10, uxtw #2
509 0x4f839043, //fmul v3.4s, v2.4s, v3.s[0]
510 0x0d408142, //ld1 {v2.s}[0], [x10]
511 0x8b2d490a, //add x10, x8, w13, uxtw #2
512 0x6e140620, //mov v0.s[2], v17.s[0]
513 0xbc6b5911, //ldr s17, [x8, w11, uxtw #2]
514 0x0d409121, //ld1 {v1.s}[1], [x9]
515 0x0d409142, //ld1 {v2.s}[1], [x10]
516 0x6e1c0600, //mov v0.s[3], v16.s[0]
517 0xbc6c5910, //ldr s16, [x8, w12, uxtw #2]
518 0x6e140641, //mov v1.s[2], v18.s[0]
519 0x6e140622, //mov v2.s[2], v17.s[0]
520 0x6e1c0661, //mov v1.s[3], v19.s[0]
521 0x6e1c0602, //mov v2.s[3], v16.s[0]
522 0xd61f0060, //br x3
523};
524
525CODE const uint32_t sk_load_a8_aarch64[] = {
526 0xa8c10c28, //ldp x8, x3, [x1], #16
527 0xbd400c43, //ldr s3, [x2, #12]
528 0x6f00e400, //movi v0.2d, #0x0
529 0x6f00e401, //movi v1.2d, #0x0
530 0xf9400108, //ldr x8, [x8]
531 0x8b000108, //add x8, x8, x0
532 0x39400109, //ldrb w9, [x8]
533 0x3940050a, //ldrb w10, [x8, #1]
534 0x3940090b, //ldrb w11, [x8, #2]
535 0x39400d08, //ldrb w8, [x8, #3]
536 0x4e021d22, //mov v2.h[0], w9
537 0x4e061d42, //mov v2.h[1], w10
538 0x4e0a1d62, //mov v2.h[2], w11
539 0x4e0e1d02, //mov v2.h[3], w8
540 0x2f07b7e2, //bic v2.4h, #0xff, lsl #8
541 0x2f10a442, //uxtl v2.4s, v2.4h
542 0x6e21d842, //ucvtf v2.4s, v2.4s
543 0x4f839043, //fmul v3.4s, v2.4s, v3.s[0]
544 0x6f00e402, //movi v2.2d, #0x0
545 0xd61f0060, //br x3
546};
547
548CODE const uint32_t sk_store_a8_aarch64[] = {
549 0xf9400028, //ldr x8, [x1]
550 0xbd400850, //ldr s16, [x2, #8]
551 0xf9400108, //ldr x8, [x8]
552 0x4f909070, //fmul v16.4s, v3.4s, v16.s[0]
553 0x6e21aa10, //fcvtnu v16.4s, v16.4s
554 0x0e612a10, //xtn v16.4h, v16.4s
555 0x0e0e3e09, //umov w9, v16.h[3]
556 0x8b000108, //add x8, x8, x0
557 0x39000d09, //strb w9, [x8, #3]
558 0x0e0a3e09, //umov w9, v16.h[2]
559 0x39000909, //strb w9, [x8, #2]
560 0x0e063e09, //umov w9, v16.h[1]
561 0x39000509, //strb w9, [x8, #1]
562 0x0e023e09, //umov w9, v16.h[0]
563 0x39000109, //strb w9, [x8]
564 0xf9400423, //ldr x3, [x1, #8]
565 0x91004021, //add x1, x1, #0x10
566 0xd61f0060, //br x3
567};
568
569CODE const uint32_t sk_load_565_aarch64[] = {
570 0xa8c10c28, //ldp x8, x3, [x1], #16
571 0xd37ff809, //lsl x9, x0, #1
572 0xf9400108, //ldr x8, [x8]
573 0xfc696900, //ldr d0, [x8, x9]
574 0x9101a048, //add x8, x2, #0x68
575 0x4d40c901, //ld1r {v1.4s}, [x8]
576 0x9101b048, //add x8, x2, #0x6c
577 0x4d40c902, //ld1r {v2.4s}, [x8]
578 0x9101c048, //add x8, x2, #0x70
579 0x4d40c903, //ld1r {v3.4s}, [x8]
580 0x2f10a400, //uxtl v0.4s, v0.4h
581 0x4e201c21, //and v1.16b, v1.16b, v0.16b
582 0x4e201c42, //and v2.16b, v2.16b, v0.16b
583 0x4e201c71, //and v17.16b, v3.16b, v0.16b
584 0x2d4e8c50, //ldp s16, s3, [x2, #116]
585 0x4e21d820, //scvtf v0.4s, v1.4s
586 0x4e21d841, //scvtf v1.4s, v2.4s
587 0x4e21da22, //scvtf v2.4s, v17.4s
588 0x4f909000, //fmul v0.4s, v0.4s, v16.s[0]
589 0xbd407c50, //ldr s16, [x2, #124]
590 0x4f839021, //fmul v1.4s, v1.4s, v3.s[0]
591 0x4d40c843, //ld1r {v3.4s}, [x2]
592 0x4f909042, //fmul v2.4s, v2.4s, v16.s[0]
593 0xd61f0060, //br x3
594};
595
596CODE const uint32_t sk_store_565_aarch64[] = {
597 0x2d504450, //ldp s16, s17, [x2, #128]
598 0xf9400028, //ldr x8, [x1]
599 0xd37ff809, //lsl x9, x0, #1
600 0x4f909012, //fmul v18.4s, v0.4s, v16.s[0]
601 0x4f919031, //fmul v17.4s, v1.4s, v17.s[0]
602 0x6e21aa52, //fcvtnu v18.4s, v18.4s
603 0x6e21aa31, //fcvtnu v17.4s, v17.4s
604 0xf9400108, //ldr x8, [x8]
605 0x4f909050, //fmul v16.4s, v2.4s, v16.s[0]
606 0x4f2b5652, //shl v18.4s, v18.4s, #11
607 0x4f255631, //shl v17.4s, v17.4s, #5
608 0x4eb21e31, //orr v17.16b, v17.16b, v18.16b
609 0x6e21aa10, //fcvtnu v16.4s, v16.4s
610 0x4eb01e30, //orr v16.16b, v17.16b, v16.16b
611 0x0e612a10, //xtn v16.4h, v16.4s
612 0xfc296910, //str d16, [x8, x9]
613 0xf9400423, //ldr x3, [x1, #8]
614 0x91004021, //add x1, x1, #0x10
615 0xd61f0060, //br x3
616};
617
618CODE const uint32_t sk_load_8888_aarch64[] = {
619 0xa8c10c28, //ldp x8, x3, [x1], #16
620 0xd37ef409, //lsl x9, x0, #2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500621 0x6f00e621, //movi v1.2d, #0xff000000ff
Mike Klein894d5612017-03-07 07:59:52 -0500622 0xf9400108, //ldr x8, [x8]
623 0x3ce96900, //ldr q0, [x8, x9]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500624 0x52a77008, //mov w8, #0x3b800000
625 0x72901028, //movk w8, #0x8081
626 0x4e040d02, //dup v2.4s, w8
Mike Klein894d5612017-03-07 07:59:52 -0500627 0x6f380410, //ushr v16.4s, v0.4s, #8
628 0x6f300411, //ushr v17.4s, v0.4s, #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500629 0x4e211c03, //and v3.16b, v0.16b, v1.16b
Mike Klein894d5612017-03-07 07:59:52 -0500630 0x6f280400, //ushr v0.4s, v0.4s, #24
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500631 0x4e211e10, //and v16.16b, v16.16b, v1.16b
632 0x4e211e21, //and v1.16b, v17.16b, v1.16b
Mike Klein894d5612017-03-07 07:59:52 -0500633 0x4e21d863, //scvtf v3.4s, v3.4s
634 0x4e21d811, //scvtf v17.4s, v0.4s
635 0x4e21da10, //scvtf v16.4s, v16.4s
636 0x4e21d832, //scvtf v18.4s, v1.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500637 0x6e22dc60, //fmul v0.4s, v3.4s, v2.4s
638 0x6e22de23, //fmul v3.4s, v17.4s, v2.4s
639 0x6e22de01, //fmul v1.4s, v16.4s, v2.4s
640 0x6e22de42, //fmul v2.4s, v18.4s, v2.4s
Mike Klein894d5612017-03-07 07:59:52 -0500641 0xd61f0060, //br x3
642};
643
644CODE const uint32_t sk_store_8888_aarch64[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500645 0x52a86fea, //mov w10, #0x437f0000
646 0x4e040d50, //dup v16.4s, w10
Mike Klein894d5612017-03-07 07:59:52 -0500647 0xf9400028, //ldr x8, [x1]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500648 0x6e30dc32, //fmul v18.4s, v1.4s, v16.4s
649 0x6e30dc11, //fmul v17.4s, v0.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500650 0x6e21aa52, //fcvtnu v18.4s, v18.4s
651 0x6e21aa31, //fcvtnu v17.4s, v17.4s
652 0x4f285652, //shl v18.4s, v18.4s, #8
653 0x4eb11e51, //orr v17.16b, v18.16b, v17.16b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500654 0x6e30dc52, //fmul v18.4s, v2.4s, v16.4s
655 0x6e30dc70, //fmul v16.4s, v3.4s, v16.4s
Mike Klein894d5612017-03-07 07:59:52 -0500656 0x6e21aa52, //fcvtnu v18.4s, v18.4s
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500657 0xf9400108, //ldr x8, [x8]
Mike Klein894d5612017-03-07 07:59:52 -0500658 0x6e21aa10, //fcvtnu v16.4s, v16.4s
659 0x4f305652, //shl v18.4s, v18.4s, #16
660 0x4eb21e31, //orr v17.16b, v17.16b, v18.16b
661 0x4f385610, //shl v16.4s, v16.4s, #24
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500662 0xd37ef409, //lsl x9, x0, #2
Mike Klein894d5612017-03-07 07:59:52 -0500663 0x4eb01e30, //orr v16.16b, v17.16b, v16.16b
664 0x3ca96910, //str q16, [x8, x9]
665 0xf9400423, //ldr x3, [x1, #8]
666 0x91004021, //add x1, x1, #0x10
667 0xd61f0060, //br x3
668};
669
670CODE const uint32_t sk_load_f16_aarch64[] = {
671 0xa8c10c28, //ldp x8, x3, [x1], #16
672 0xf9400108, //ldr x8, [x8]
673 0x8b000d08, //add x8, x8, x0, lsl #3
674 0x0c400510, //ld4 {v16.4h-v19.4h}, [x8]
675 0x0e217a00, //fcvtl v0.4s, v16.4h
676 0x0e217a21, //fcvtl v1.4s, v17.4h
677 0x0e217a42, //fcvtl v2.4s, v18.4h
678 0x0e217a63, //fcvtl v3.4s, v19.4h
679 0xd61f0060, //br x3
680};
681
682CODE const uint32_t sk_store_f16_aarch64[] = {
683 0xf9400028, //ldr x8, [x1]
684 0x0e216810, //fcvtn v16.4h, v0.4s
685 0x0e216831, //fcvtn v17.4h, v1.4s
686 0x0e216852, //fcvtn v18.4h, v2.4s
687 0xf9400108, //ldr x8, [x8]
688 0x0e216873, //fcvtn v19.4h, v3.4s
689 0x8b000d08, //add x8, x8, x0, lsl #3
690 0x0c000510, //st4 {v16.4h-v19.4h}, [x8]
691 0xf9400423, //ldr x3, [x1, #8]
692 0x91004021, //add x1, x1, #0x10
693 0xd61f0060, //br x3
694};
695
696CODE const uint32_t sk_store_f32_aarch64[] = {
697 0xf9400028, //ldr x8, [x1]
698 0xf9400108, //ldr x8, [x8]
699 0x8b001108, //add x8, x8, x0, lsl #4
700 0x4c000900, //st4 {v0.4s-v3.4s}, [x8]
701 0xf9400423, //ldr x3, [x1, #8]
702 0x91004021, //add x1, x1, #0x10
703 0xd61f0060, //br x3
704};
705
706CODE const uint32_t sk_clamp_x_aarch64[] = {
707 0xa8c10c28, //ldp x8, x3, [x1], #16
708 0x6f00e411, //movi v17.2d, #0x0
709 0x4e20f620, //fmax v0.4s, v17.4s, v0.4s
710 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
711 0x4d40c910, //ld1r {v16.4s}, [x8]
712 0x4eb18610, //add v16.4s, v16.4s, v17.4s
713 0x4eb0f400, //fmin v0.4s, v0.4s, v16.4s
714 0xd61f0060, //br x3
715};
716
717CODE const uint32_t sk_clamp_y_aarch64[] = {
718 0xa8c10c28, //ldp x8, x3, [x1], #16
719 0x6f00e411, //movi v17.2d, #0x0
720 0x4e21f621, //fmax v1.4s, v17.4s, v1.4s
721 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
722 0x4d40c910, //ld1r {v16.4s}, [x8]
723 0x4eb18610, //add v16.4s, v16.4s, v17.4s
724 0x4eb0f421, //fmin v1.4s, v1.4s, v16.4s
725 0xd61f0060, //br x3
726};
727
728CODE const uint32_t sk_repeat_x_aarch64[] = {
729 0xa8c10c28, //ldp x8, x3, [x1], #16
730 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
731 0xbd400110, //ldr s16, [x8]
732 0x4e040612, //dup v18.4s, v16.s[0]
733 0x4eb18651, //add v17.4s, v18.4s, v17.4s
734 0x6e32fc12, //fdiv v18.4s, v0.4s, v18.4s
735 0x4e219a52, //frintm v18.4s, v18.4s
736 0x4f905240, //fmls v0.4s, v18.4s, v16.s[0]
737 0x4eb1f400, //fmin v0.4s, v0.4s, v17.4s
738 0xd61f0060, //br x3
739};
740
741CODE const uint32_t sk_repeat_y_aarch64[] = {
742 0xa8c10c28, //ldp x8, x3, [x1], #16
743 0x6f07e7f1, //movi v17.2d, #0xffffffffffffffff
744 0xbd400110, //ldr s16, [x8]
745 0x4e040612, //dup v18.4s, v16.s[0]
746 0x4eb18651, //add v17.4s, v18.4s, v17.4s
747 0x6e32fc32, //fdiv v18.4s, v1.4s, v18.4s
748 0x4e219a52, //frintm v18.4s, v18.4s
749 0x4f905241, //fmls v1.4s, v18.4s, v16.s[0]
750 0x4eb1f421, //fmin v1.4s, v1.4s, v17.4s
751 0xd61f0060, //br x3
752};
753
754CODE const uint32_t sk_mirror_x_aarch64[] = {
755 0xa8c10c28, //ldp x8, x3, [x1], #16
756 0xbd400110, //ldr s16, [x8]
757 0x4e040611, //dup v17.4s, v16.s[0]
758 0x1e302a10, //fadd s16, s16, s16
759 0x4eb1d400, //fsub v0.4s, v0.4s, v17.4s
760 0x4e040612, //dup v18.4s, v16.s[0]
761 0x6e32fc12, //fdiv v18.4s, v0.4s, v18.4s
762 0x4e219a52, //frintm v18.4s, v18.4s
763 0x4f905240, //fmls v0.4s, v18.4s, v16.s[0]
764 0x6f07e7f0, //movi v16.2d, #0xffffffffffffffff
765 0x4eb1d400, //fsub v0.4s, v0.4s, v17.4s
766 0x4eb08630, //add v16.4s, v17.4s, v16.4s
767 0x4ea0f800, //fabs v0.4s, v0.4s
768 0x4eb0f400, //fmin v0.4s, v0.4s, v16.4s
769 0xd61f0060, //br x3
770};
771
772CODE const uint32_t sk_mirror_y_aarch64[] = {
773 0xa8c10c28, //ldp x8, x3, [x1], #16
774 0xbd400110, //ldr s16, [x8]
775 0x4e040611, //dup v17.4s, v16.s[0]
776 0x1e302a10, //fadd s16, s16, s16
777 0x4eb1d421, //fsub v1.4s, v1.4s, v17.4s
778 0x4e040612, //dup v18.4s, v16.s[0]
779 0x6e32fc32, //fdiv v18.4s, v1.4s, v18.4s
780 0x4e219a52, //frintm v18.4s, v18.4s
781 0x4f905241, //fmls v1.4s, v18.4s, v16.s[0]
782 0x6f07e7f0, //movi v16.2d, #0xffffffffffffffff
783 0x4eb1d421, //fsub v1.4s, v1.4s, v17.4s
784 0x4eb08630, //add v16.4s, v17.4s, v16.4s
785 0x4ea0f821, //fabs v1.4s, v1.4s
786 0x4eb0f421, //fmin v1.4s, v1.4s, v16.4s
787 0xd61f0060, //br x3
788};
789
Mike Kleine9ed07d2017-03-07 12:28:11 -0500790CODE const uint32_t sk_luminance_to_alpha_aarch64[] = {
791 0x2d510c50, //ldp s16, s3, [x2, #136]
792 0xbd409051, //ldr s17, [x2, #144]
793 0xf8408423, //ldr x3, [x1], #8
794 0x4f839023, //fmul v3.4s, v1.4s, v3.s[0]
795 0x4f901003, //fmla v3.4s, v0.4s, v16.s[0]
796 0x6f00e400, //movi v0.2d, #0x0
797 0x6f00e401, //movi v1.2d, #0x0
798 0x4f911043, //fmla v3.4s, v2.4s, v17.s[0]
799 0x6f00e402, //movi v2.2d, #0x0
800 0xd61f0060, //br x3
801};
802
Mike Klein894d5612017-03-07 07:59:52 -0500803CODE const uint32_t sk_matrix_2x3_aarch64[] = {
804 0xa8c10c28, //ldp x8, x3, [x1], #16
805 0xaa0803e9, //mov x9, x8
806 0x9100410a, //add x10, x8, #0x10
807 0x4ddfc932, //ld1r {v18.4s}, [x9], #4
808 0x4d40c950, //ld1r {v16.4s}, [x10]
809 0x2d415113, //ldp s19, s20, [x8, #8]
810 0x9100510a, //add x10, x8, #0x14
811 0x4d40c951, //ld1r {v17.4s}, [x10]
812 0x4f931030, //fmla v16.4s, v1.4s, v19.s[0]
813 0xbd400133, //ldr s19, [x9]
814 0x4f941031, //fmla v17.4s, v1.4s, v20.s[0]
815 0x4e20ce50, //fmla v16.4s, v18.4s, v0.4s
816 0x4f931011, //fmla v17.4s, v0.4s, v19.s[0]
817 0x4eb01e00, //mov v0.16b, v16.16b
818 0x4eb11e21, //mov v1.16b, v17.16b
819 0xd61f0060, //br x3
820};
821
822CODE const uint32_t sk_matrix_3x4_aarch64[] = {
823 0xa8c10c28, //ldp x8, x3, [x1], #16
824 0xaa0803e9, //mov x9, x8
825 0x9100910a, //add x10, x8, #0x24
826 0x4ddfc933, //ld1r {v19.4s}, [x9], #4
827 0x4d40c950, //ld1r {v16.4s}, [x10]
828 0x9100a10a, //add x10, x8, #0x28
829 0x4d40c951, //ld1r {v17.4s}, [x10]
830 0x9100b10a, //add x10, x8, #0x2c
831 0x2d435514, //ldp s20, s21, [x8, #24]
832 0xbd402116, //ldr s22, [x8, #32]
833 0x4d40c952, //ld1r {v18.4s}, [x10]
834 0x4f941050, //fmla v16.4s, v2.4s, v20.s[0]
835 0x4f951051, //fmla v17.4s, v2.4s, v21.s[0]
836 0x4f961052, //fmla v18.4s, v2.4s, v22.s[0]
837 0x2d425502, //ldp s2, s21, [x8, #16]
838 0x2d415d14, //ldp s20, s23, [x8, #8]
839 0x4f821031, //fmla v17.4s, v1.4s, v2.s[0]
840 0xbd400122, //ldr s2, [x9]
841 0x4f971030, //fmla v16.4s, v1.4s, v23.s[0]
842 0x4f951032, //fmla v18.4s, v1.4s, v21.s[0]
843 0x4e20ce70, //fmla v16.4s, v19.4s, v0.4s
844 0x4f941012, //fmla v18.4s, v0.4s, v20.s[0]
845 0x4f821011, //fmla v17.4s, v0.4s, v2.s[0]
846 0x4eb01e00, //mov v0.16b, v16.16b
847 0x4eb11e21, //mov v1.16b, v17.16b
848 0x4eb21e42, //mov v2.16b, v18.16b
849 0xd61f0060, //br x3
850};
851
Mike Kleine9ed07d2017-03-07 12:28:11 -0500852CODE const uint32_t sk_matrix_4x5_aarch64[] = {
853 0xf9400029, //ldr x9, [x1]
854 0xaa0903e8, //mov x8, x9
855 0x9101012a, //add x10, x9, #0x40
856 0x4ddfc914, //ld1r {v20.4s}, [x8], #4
857 0x4d40c950, //ld1r {v16.4s}, [x10]
858 0x9101112a, //add x10, x9, #0x44
859 0x4d40c951, //ld1r {v17.4s}, [x10]
860 0x9101212a, //add x10, x9, #0x48
861 0x4d40c952, //ld1r {v18.4s}, [x10]
862 0x2d465533, //ldp s19, s21, [x9, #48]
863 0x2d475d36, //ldp s22, s23, [x9, #56]
864 0x9101312a, //add x10, x9, #0x4c
865 0xf9400423, //ldr x3, [x1, #8]
866 0x4f931070, //fmla v16.4s, v3.4s, v19.s[0]
867 0x4d40c953, //ld1r {v19.4s}, [x10]
868 0x4f951071, //fmla v17.4s, v3.4s, v21.s[0]
869 0x4f961072, //fmla v18.4s, v3.4s, v22.s[0]
870 0x2d445935, //ldp s21, s22, [x9, #32]
871 0x4f971073, //fmla v19.4s, v3.4s, v23.s[0]
872 0x2d455d23, //ldp s3, s23, [x9, #40]
873 0x91004021, //add x1, x1, #0x10
874 0x4f951050, //fmla v16.4s, v2.4s, v21.s[0]
875 0x4f961051, //fmla v17.4s, v2.4s, v22.s[0]
876 0x2d425935, //ldp s21, s22, [x9, #16]
877 0x4f971053, //fmla v19.4s, v2.4s, v23.s[0]
878 0x4f831052, //fmla v18.4s, v2.4s, v3.s[0]
879 0x2d410d22, //ldp s2, s3, [x9, #8]
880 0x4f951030, //fmla v16.4s, v1.4s, v21.s[0]
881 0x2d435d35, //ldp s21, s23, [x9, #24]
882 0x4f961031, //fmla v17.4s, v1.4s, v22.s[0]
883 0xbd400116, //ldr s22, [x8]
884 0x4e20ce90, //fmla v16.4s, v20.4s, v0.4s
885 0x4f951032, //fmla v18.4s, v1.4s, v21.s[0]
886 0x4f971033, //fmla v19.4s, v1.4s, v23.s[0]
887 0x4f821012, //fmla v18.4s, v0.4s, v2.s[0]
888 0x4f831013, //fmla v19.4s, v0.4s, v3.s[0]
889 0x4f961011, //fmla v17.4s, v0.4s, v22.s[0]
890 0x4eb01e00, //mov v0.16b, v16.16b
891 0x4eb11e21, //mov v1.16b, v17.16b
892 0x4eb21e42, //mov v2.16b, v18.16b
893 0x4eb31e63, //mov v3.16b, v19.16b
894 0xd61f0060, //br x3
895};
896
Mike Klein894d5612017-03-07 07:59:52 -0500897CODE const uint32_t sk_matrix_perspective_aarch64[] = {
898 0xa8c10c28, //ldp x8, x3, [x1], #16
899 0xaa0803e9, //mov x9, x8
900 0x9100510a, //add x10, x8, #0x14
901 0x4ddfc930, //ld1r {v16.4s}, [x9], #4
902 0x4d40c951, //ld1r {v17.4s}, [x10]
903 0x9100810a, //add x10, x8, #0x20
904 0x4d40c952, //ld1r {v18.4s}, [x10]
905 0x2d41d113, //ldp s19, s20, [x8, #12]
906 0x2d435915, //ldp s21, s22, [x8, #24]
907 0x91002108, //add x8, x8, #0x8
908 0x4f941031, //fmla v17.4s, v1.4s, v20.s[0]
909 0x4d40c914, //ld1r {v20.4s}, [x8]
910 0x4f961032, //fmla v18.4s, v1.4s, v22.s[0]
911 0xbd400136, //ldr s22, [x9]
912 0x4f951012, //fmla v18.4s, v0.4s, v21.s[0]
913 0x4f931011, //fmla v17.4s, v0.4s, v19.s[0]
914 0x4f961034, //fmla v20.4s, v1.4s, v22.s[0]
915 0x4ea1da41, //frecpe v1.4s, v18.4s
916 0x4e21fe52, //frecps v18.4s, v18.4s, v1.4s
917 0x6e32dc32, //fmul v18.4s, v1.4s, v18.4s
918 0x4e20ce14, //fmla v20.4s, v16.4s, v0.4s
919 0x6e32de21, //fmul v1.4s, v17.4s, v18.4s
920 0x6e32de80, //fmul v0.4s, v20.4s, v18.4s
921 0xd61f0060, //br x3
922};
923
924CODE const uint32_t sk_linear_gradient_2stops_aarch64[] = {
925 0xa8c10c28, //ldp x8, x3, [x1], #16
926 0xad404503, //ldp q3, q17, [x8]
927 0x4e040470, //dup v16.4s, v3.s[0]
928 0x4e0c0461, //dup v1.4s, v3.s[1]
929 0x4e140462, //dup v2.4s, v3.s[2]
930 0x4e1c0463, //dup v3.4s, v3.s[3]
931 0x4f911010, //fmla v16.4s, v0.4s, v17.s[0]
932 0x4fb11001, //fmla v1.4s, v0.4s, v17.s[1]
933 0x4f911802, //fmla v2.4s, v0.4s, v17.s[2]
934 0x4fb11803, //fmla v3.4s, v0.4s, v17.s[3]
935 0x4eb01e00, //mov v0.16b, v16.16b
936 0xd61f0060, //br x3
937};
938#elif defined(__arm__)
939
940CODE const uint32_t sk_start_pipeline_vfp4[] = {
941 0xe92d41f0, //push {r4, r5, r6, r7, r8, lr}
942 0xe1a07001, //mov r7, r1
943 0xe1a04000, //mov r4, r0
944 0xe1a05003, //mov r5, r3
945 0xe1a08002, //mov r8, r2
946 0xe4976004, //ldr r6, [r7], #4
947 0xe2840002, //add r0, r4, #2
948 0xea00000d, //b 58 <sk_start_pipeline_vfp4+0x58>
949 0xf2800010, //vmov.i32 d0, #0
950 0xe1a00004, //mov r0, r4
951 0xf2801010, //vmov.i32 d1, #0
952 0xe1a01007, //mov r1, r7
953 0xf2802010, //vmov.i32 d2, #0
954 0xe1a02008, //mov r2, r8
955 0xf2803010, //vmov.i32 d3, #0
956 0xf2804010, //vmov.i32 d4, #0
957 0xf2805010, //vmov.i32 d5, #0
958 0xf2806010, //vmov.i32 d6, #0
959 0xf2807010, //vmov.i32 d7, #0
960 0xe12fff36, //blx r6
961 0xe2840004, //add r0, r4, #4
962 0xe2844002, //add r4, r4, #2
963 0xe1500005, //cmp r0, r5
964 0x9affffef, //bls 20 <sk_start_pipeline_vfp4+0x20>
965 0xe1a00004, //mov r0, r4
966 0xe8bd81f0, //pop {r4, r5, r6, r7, r8, pc}
967};
968
969CODE const uint32_t sk_just_return_vfp4[] = {
970 0xe12fff1e, //bx lr
971};
972
973CODE const uint32_t sk_seed_shader_vfp4[] = {
Mike Klein894d5612017-03-07 07:59:52 -0500974 0xee800b90, //vdup.32 d16, r0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500975 0xe8911008, //ldm r1, {r3, ip}
Mike Klein894d5612017-03-07 07:59:52 -0500976 0xf3fb0620, //vcvt.f32.s32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500977 0xf2c3161f, //vmov.i32 d17, #1056964608
Mike Klein894d5612017-03-07 07:59:52 -0500978 0xedd23b05, //vldr d19, [r2, #20]
Mike Klein894d5612017-03-07 07:59:52 -0500979 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500980 0xf2872f10, //vmov.f32 d2, #1
981 0xf3fb2622, //vcvt.f32.s32 d18, d18
982 0xe2811008, //add r1, r1, #8
983 0xf2400da1, //vadd.f32 d16, d16, d17
984 0xf2803010, //vmov.i32 d3, #0
Mike Klein894d5612017-03-07 07:59:52 -0500985 0xf2804010, //vmov.i32 d4, #0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -0500986 0xf2021da1, //vadd.f32 d1, d18, d17
987 0xf2000da3, //vadd.f32 d0, d16, d19
Mike Klein894d5612017-03-07 07:59:52 -0500988 0xf2805010, //vmov.i32 d5, #0
Mike Klein894d5612017-03-07 07:59:52 -0500989 0xf2806010, //vmov.i32 d6, #0
Mike Klein894d5612017-03-07 07:59:52 -0500990 0xf2807010, //vmov.i32 d7, #0
991 0xe12fff1c, //bx ip
992};
993
994CODE const uint32_t sk_constant_color_vfp4[] = {
995 0xe8911008, //ldm r1, {r3, ip}
996 0xe2811008, //add r1, r1, #8
997 0xf4630a0f, //vld1.8 {d16-d17}, [r3]
998 0xf3b40c20, //vdup.32 d0, d16[0]
999 0xf3bc1c20, //vdup.32 d1, d16[1]
1000 0xf3b42c21, //vdup.32 d2, d17[0]
1001 0xf3bc3c21, //vdup.32 d3, d17[1]
1002 0xe12fff1c, //bx ip
1003};
1004
1005CODE const uint32_t sk_clear_vfp4[] = {
1006 0xe4913004, //ldr r3, [r1], #4
1007 0xf2800010, //vmov.i32 d0, #0
1008 0xf2801010, //vmov.i32 d1, #0
1009 0xf2802010, //vmov.i32 d2, #0
1010 0xf2803010, //vmov.i32 d3, #0
1011 0xe12fff13, //bx r3
1012};
1013
1014CODE const uint32_t sk_plus__vfp4[] = {
1015 0xf2000d04, //vadd.f32 d0, d0, d4
1016 0xe4913004, //ldr r3, [r1], #4
1017 0xf2011d05, //vadd.f32 d1, d1, d5
1018 0xf2022d06, //vadd.f32 d2, d2, d6
1019 0xf2033d07, //vadd.f32 d3, d3, d7
1020 0xe12fff13, //bx r3
1021};
1022
1023CODE const uint32_t sk_srcover_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001024 0xf2c70f10, //vmov.f32 d16, #1
Mike Klein894d5612017-03-07 07:59:52 -05001025 0xe4913004, //ldr r3, [r1], #4
1026 0xf2600d83, //vsub.f32 d16, d16, d3
1027 0xf2040c30, //vfma.f32 d0, d4, d16
1028 0xf2051c30, //vfma.f32 d1, d5, d16
1029 0xf2062c30, //vfma.f32 d2, d6, d16
1030 0xf2073c30, //vfma.f32 d3, d7, d16
1031 0xe12fff13, //bx r3
1032};
1033
1034CODE const uint32_t sk_dstover_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001035 0xf2c70f10, //vmov.f32 d16, #1
1036 0xe4913004, //ldr r3, [r1], #4
Mike Klein894d5612017-03-07 07:59:52 -05001037 0xf2651115, //vorr d17, d5, d5
1038 0xf2604d87, //vsub.f32 d20, d16, d7
1039 0xf2640114, //vorr d16, d4, d4
1040 0xf2662116, //vorr d18, d6, d6
Mike Klein894d5612017-03-07 07:59:52 -05001041 0xf2673117, //vorr d19, d7, d7
1042 0xf2400c34, //vfma.f32 d16, d0, d20
1043 0xf2411c34, //vfma.f32 d17, d1, d20
1044 0xf2422c34, //vfma.f32 d18, d2, d20
1045 0xf2433c34, //vfma.f32 d19, d3, d20
1046 0xf22001b0, //vorr d0, d16, d16
1047 0xf22111b1, //vorr d1, d17, d17
1048 0xf22221b2, //vorr d2, d18, d18
1049 0xf22331b3, //vorr d3, d19, d19
1050 0xe12fff13, //bx r3
1051};
1052
1053CODE const uint32_t sk_clamp_0_vfp4[] = {
1054 0xf2c00010, //vmov.i32 d16, #0
1055 0xe4913004, //ldr r3, [r1], #4
1056 0xf2000f20, //vmax.f32 d0, d0, d16
1057 0xf2011f20, //vmax.f32 d1, d1, d16
1058 0xf2022f20, //vmax.f32 d2, d2, d16
1059 0xf2033f20, //vmax.f32 d3, d3, d16
1060 0xe12fff13, //bx r3
1061};
1062
1063CODE const uint32_t sk_clamp_1_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001064 0xf2c70f10, //vmov.f32 d16, #1
Mike Klein894d5612017-03-07 07:59:52 -05001065 0xe4913004, //ldr r3, [r1], #4
1066 0xf2200f20, //vmin.f32 d0, d0, d16
1067 0xf2211f20, //vmin.f32 d1, d1, d16
1068 0xf2222f20, //vmin.f32 d2, d2, d16
1069 0xf2233f20, //vmin.f32 d3, d3, d16
1070 0xe12fff13, //bx r3
1071};
1072
1073CODE const uint32_t sk_clamp_a_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001074 0xf2c70f10, //vmov.f32 d16, #1
Mike Klein894d5612017-03-07 07:59:52 -05001075 0xe4913004, //ldr r3, [r1], #4
1076 0xf2233f20, //vmin.f32 d3, d3, d16
1077 0xf2200f03, //vmin.f32 d0, d0, d3
1078 0xf2211f03, //vmin.f32 d1, d1, d3
1079 0xf2222f03, //vmin.f32 d2, d2, d3
1080 0xe12fff13, //bx r3
1081};
1082
1083CODE const uint32_t sk_set_rgb_vfp4[] = {
1084 0xe92d4800, //push {fp, lr}
1085 0xe591e000, //ldr lr, [r1]
1086 0xe591c004, //ldr ip, [r1, #4]
1087 0xe2811008, //add r1, r1, #8
1088 0xe28e3008, //add r3, lr, #8
1089 0xf4ae0c9f, //vld1.32 {d0[]}, [lr :32]
1090 0xf4a32c9f, //vld1.32 {d2[]}, [r3 :32]
1091 0xe28e3004, //add r3, lr, #4
1092 0xf4a31c9f, //vld1.32 {d1[]}, [r3 :32]
1093 0xe8bd4800, //pop {fp, lr}
1094 0xe12fff1c, //bx ip
1095};
1096
1097CODE const uint32_t sk_swap_rb_vfp4[] = {
1098 0xeef00b40, //vmov.f64 d16, d0
1099 0xe4913004, //ldr r3, [r1], #4
1100 0xeeb00b42, //vmov.f64 d0, d2
1101 0xeeb02b60, //vmov.f64 d2, d16
1102 0xe12fff13, //bx r3
1103};
1104
1105CODE const uint32_t sk_swap_vfp4[] = {
1106 0xeef00b43, //vmov.f64 d16, d3
1107 0xe4913004, //ldr r3, [r1], #4
1108 0xeef01b42, //vmov.f64 d17, d2
1109 0xeef02b41, //vmov.f64 d18, d1
1110 0xeef03b40, //vmov.f64 d19, d0
1111 0xeeb00b44, //vmov.f64 d0, d4
1112 0xeeb01b45, //vmov.f64 d1, d5
1113 0xeeb02b46, //vmov.f64 d2, d6
1114 0xeeb03b47, //vmov.f64 d3, d7
1115 0xeeb04b63, //vmov.f64 d4, d19
1116 0xeeb05b62, //vmov.f64 d5, d18
1117 0xeeb06b61, //vmov.f64 d6, d17
1118 0xeeb07b60, //vmov.f64 d7, d16
1119 0xe12fff13, //bx r3
1120};
1121
1122CODE const uint32_t sk_move_src_dst_vfp4[] = {
1123 0xeeb04b40, //vmov.f64 d4, d0
1124 0xe4913004, //ldr r3, [r1], #4
1125 0xeeb05b41, //vmov.f64 d5, d1
1126 0xeeb06b42, //vmov.f64 d6, d2
1127 0xeeb07b43, //vmov.f64 d7, d3
1128 0xe12fff13, //bx r3
1129};
1130
1131CODE const uint32_t sk_move_dst_src_vfp4[] = {
1132 0xeeb00b44, //vmov.f64 d0, d4
1133 0xe4913004, //ldr r3, [r1], #4
1134 0xeeb01b45, //vmov.f64 d1, d5
1135 0xeeb02b46, //vmov.f64 d2, d6
1136 0xeeb03b47, //vmov.f64 d3, d7
1137 0xe12fff13, //bx r3
1138};
1139
1140CODE const uint32_t sk_premul_vfp4[] = {
1141 0xf3000d13, //vmul.f32 d0, d0, d3
1142 0xe4913004, //ldr r3, [r1], #4
1143 0xf3011d13, //vmul.f32 d1, d1, d3
1144 0xf3022d13, //vmul.f32 d2, d2, d3
1145 0xe12fff13, //bx r3
1146};
1147
1148CODE const uint32_t sk_unpremul_vfp4[] = {
1149 0xed2d8b04, //vpush {d8-d9}
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001150 0xeeb78a00, //vmov.f32 s16, #112
Mike Klein894d5612017-03-07 07:59:52 -05001151 0xf3f91503, //vceq.f32 d17, d3, #0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001152 0xf2c00010, //vmov.i32 d16, #0
Mike Klein894d5612017-03-07 07:59:52 -05001153 0xe4913004, //ldr r3, [r1], #4
1154 0xeec89a23, //vdiv.f32 s19, s16, s7
1155 0xee889a03, //vdiv.f32 s18, s16, s6
1156 0xf3501199, //vbsl d17, d16, d9
1157 0xf3010d90, //vmul.f32 d0, d17, d0
1158 0xf3011d91, //vmul.f32 d1, d17, d1
1159 0xf3012d92, //vmul.f32 d2, d17, d2
1160 0xecbd8b04, //vpop {d8-d9}
1161 0xe12fff13, //bx r3
1162};
1163
1164CODE const uint32_t sk_from_srgb_vfp4[] = {
1165 0xed2d8b02, //vpush {d8}
1166 0xe282303c, //add r3, r2, #60
1167 0xed928a10, //vldr s16, [r2, #64]
1168 0xf3402d10, //vmul.f32 d18, d0, d0
1169 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1170 0xe2823038, //add r3, r2, #56
1171 0xf3413d11, //vmul.f32 d19, d1, d1
1172 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1173 0xe2823044, //add r3, r2, #68
1174 0xf26141b1, //vorr d20, d17, d17
1175 0xf26171b1, //vorr d23, d17, d17
1176 0xf4e38c9f, //vld1.32 {d24[]}, [r3 :32]
1177 0xf2404c30, //vfma.f32 d20, d0, d16
1178 0xe2823034, //add r3, r2, #52
1179 0xf2417c30, //vfma.f32 d23, d1, d16
1180 0xf2421c30, //vfma.f32 d17, d2, d16
1181 0xf3425d12, //vmul.f32 d21, d2, d2
1182 0xf2e16948, //vmul.f32 d22, d1, d8[0]
1183 0xf2e00948, //vmul.f32 d16, d0, d8[0]
1184 0xf2e29948, //vmul.f32 d25, d2, d8[0]
1185 0xf3282e82, //vcgt.f32 d2, d24, d2
1186 0xf3281e81, //vcgt.f32 d1, d24, d1
1187 0xf3280e80, //vcgt.f32 d0, d24, d0
1188 0xf4e38c9f, //vld1.32 {d24[]}, [r3 :32]
1189 0xf268a1b8, //vorr d26, d24, d24
1190 0xf242acb4, //vfma.f32 d26, d18, d20
1191 0xf26821b8, //vorr d18, d24, d24
1192 0xe4913004, //ldr r3, [r1], #4
1193 0xf2432cb7, //vfma.f32 d18, d19, d23
1194 0xf2458cb1, //vfma.f32 d24, d21, d17
1195 0xf31001ba, //vbsl d0, d16, d26
1196 0xf31611b2, //vbsl d1, d22, d18
1197 0xf31921b8, //vbsl d2, d25, d24
1198 0xecbd8b02, //vpop {d8}
1199 0xe12fff13, //bx r3
1200};
1201
1202CODE const uint32_t sk_to_srgb_vfp4[] = {
1203 0xed2d8b02, //vpush {d8}
1204 0xf3fb0580, //vrsqrte.f32 d16, d0
1205 0xe2823050, //add r3, r2, #80
1206 0xf3fb1581, //vrsqrte.f32 d17, d1
1207 0xed928a12, //vldr s16, [r2, #72]
1208 0xf3fb2582, //vrsqrte.f32 d18, d2
1209 0xf3403db0, //vmul.f32 d19, d16, d16
1210 0xf3414db1, //vmul.f32 d20, d17, d17
1211 0xf3425db2, //vmul.f32 d21, d18, d18
1212 0xf2603f33, //vrsqrts.f32 d19, d0, d19
1213 0xf2614f34, //vrsqrts.f32 d20, d1, d20
1214 0xf2625f35, //vrsqrts.f32 d21, d2, d21
1215 0xf3400db3, //vmul.f32 d16, d16, d19
1216 0xf3411db4, //vmul.f32 d17, d17, d20
1217 0xf3422db5, //vmul.f32 d18, d18, d21
1218 0xf3fb3520, //vrecpe.f32 d19, d16
1219 0xf3fb4521, //vrecpe.f32 d20, d17
1220 0xf3fb6522, //vrecpe.f32 d22, d18
1221 0xf3fb55a2, //vrsqrte.f32 d21, d18
1222 0xf3fb75a0, //vrsqrte.f32 d23, d16
1223 0xf3fb85a1, //vrsqrte.f32 d24, d17
1224 0xf2409fb3, //vrecps.f32 d25, d16, d19
1225 0xf241afb4, //vrecps.f32 d26, d17, d20
1226 0xf242bfb6, //vrecps.f32 d27, d18, d22
1227 0xf345cdb5, //vmul.f32 d28, d21, d21
1228 0xf347ddb7, //vmul.f32 d29, d23, d23
1229 0xf348edb8, //vmul.f32 d30, d24, d24
1230 0xf2622fbc, //vrsqrts.f32 d18, d18, d28
1231 0xf2600fbd, //vrsqrts.f32 d16, d16, d29
1232 0xf2611fbe, //vrsqrts.f32 d17, d17, d30
1233 0xf3433db9, //vmul.f32 d19, d19, d25
1234 0xf4e39c9f, //vld1.32 {d25[]}, [r3 :32]
1235 0xe2823054, //add r3, r2, #84
1236 0xf3444dba, //vmul.f32 d20, d20, d26
1237 0xf3466dbb, //vmul.f32 d22, d22, d27
1238 0xf4e3ac9f, //vld1.32 {d26[]}, [r3 :32]
1239 0xe282304c, //add r3, r2, #76
1240 0xf26ab1ba, //vorr d27, d26, d26
1241 0xf249bcb3, //vfma.f32 d27, d25, d19
1242 0xf26a31ba, //vorr d19, d26, d26
1243 0xf2493cb4, //vfma.f32 d19, d25, d20
1244 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1245 0xf249acb6, //vfma.f32 d26, d25, d22
1246 0xe2823058, //add r3, r2, #88
1247 0xf3452db2, //vmul.f32 d18, d21, d18
1248 0xf3470db0, //vmul.f32 d16, d23, d16
1249 0xf3481db1, //vmul.f32 d17, d24, d17
1250 0xf2e05948, //vmul.f32 d21, d0, d8[0]
1251 0xf244bcb0, //vfma.f32 d27, d20, d16
1252 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1253 0xf2443cb1, //vfma.f32 d19, d20, d17
1254 0xf244acb2, //vfma.f32 d26, d20, d18
1255 0xf4e24c9f, //vld1.32 {d20[]}, [r2 :32]
1256 0xf2e11948, //vmul.f32 d17, d1, d8[0]
1257 0xf2e22948, //vmul.f32 d18, d2, d8[0]
1258 0xf3201e81, //vcgt.f32 d1, d16, d1
1259 0xe4913004, //ldr r3, [r1], #4
1260 0xf3200e80, //vcgt.f32 d0, d16, d0
1261 0xf3202e82, //vcgt.f32 d2, d16, d2
1262 0xf2640fab, //vmin.f32 d16, d20, d27
1263 0xf2643fa3, //vmin.f32 d19, d20, d19
1264 0xf2644faa, //vmin.f32 d20, d20, d26
1265 0xf31501b0, //vbsl d0, d21, d16
1266 0xf31111b3, //vbsl d1, d17, d19
1267 0xf31221b4, //vbsl d2, d18, d20
1268 0xecbd8b02, //vpop {d8}
1269 0xe12fff13, //bx r3
1270};
1271
1272CODE const uint32_t sk_scale_1_float_vfp4[] = {
1273 0xed2d8b02, //vpush {d8}
1274 0xe8911008, //ldm r1, {r3, ip}
1275 0xe2811008, //add r1, r1, #8
1276 0xed938a00, //vldr s16, [r3]
1277 0xf2a00948, //vmul.f32 d0, d0, d8[0]
1278 0xf2a11948, //vmul.f32 d1, d1, d8[0]
1279 0xf2a22948, //vmul.f32 d2, d2, d8[0]
1280 0xf2a33948, //vmul.f32 d3, d3, d8[0]
1281 0xecbd8b02, //vpop {d8}
1282 0xe12fff1c, //bx ip
1283};
1284
1285CODE const uint32_t sk_scale_u8_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001286 0xe24dd004, //sub sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001287 0xe8911008, //ldm r1, {r3, ip}
1288 0xe2811008, //add r1, r1, #8
1289 0xe5933000, //ldr r3, [r3]
1290 0xe0833000, //add r3, r3, r0
1291 0xe1d330b0, //ldrh r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001292 0xe1cd30b0, //strh r3, [sp]
1293 0xe1a0300d, //mov r3, sp
Mike Klein894d5612017-03-07 07:59:52 -05001294 0xf4e3041f, //vld1.16 {d16[0]}, [r3 :16]
1295 0xf3c80a30, //vmovl.u8 q8, d16
1296 0xf3d00a30, //vmovl.u16 q8, d16
1297 0xf3fb06a0, //vcvt.f32.u32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001298 0xeddf1b06, //vldr d17, [pc, #24]
1299 0xf3400db1, //vmul.f32 d16, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001300 0xf3000d90, //vmul.f32 d0, d16, d0
1301 0xf3001d91, //vmul.f32 d1, d16, d1
1302 0xf3002d92, //vmul.f32 d2, d16, d2
1303 0xf3003d93, //vmul.f32 d3, d16, d3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001304 0xe28dd004, //add sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001305 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001306 0x3b808081, //.word 0x3b808081
1307 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001308};
1309
1310CODE const uint32_t sk_lerp_1_float_vfp4[] = {
1311 0xe8911008, //ldm r1, {r3, ip}
1312 0xf2600d04, //vsub.f32 d16, d0, d4
1313 0xf2611d05, //vsub.f32 d17, d1, d5
1314 0xf2622d06, //vsub.f32 d18, d2, d6
1315 0xe2811008, //add r1, r1, #8
1316 0xf2633d07, //vsub.f32 d19, d3, d7
1317 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1318 0xf2240114, //vorr d0, d4, d4
1319 0xf2251115, //vorr d1, d5, d5
1320 0xf2262116, //vorr d2, d6, d6
1321 0xf2273117, //vorr d3, d7, d7
1322 0xf2000cb4, //vfma.f32 d0, d16, d20
1323 0xf2011cb4, //vfma.f32 d1, d17, d20
1324 0xf2022cb4, //vfma.f32 d2, d18, d20
1325 0xf2033cb4, //vfma.f32 d3, d19, d20
1326 0xe12fff1c, //bx ip
1327};
1328
1329CODE const uint32_t sk_lerp_u8_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001330 0xe24dd004, //sub sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001331 0xe8911008, //ldm r1, {r3, ip}
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001332 0xf2602d04, //vsub.f32 d18, d0, d4
Mike Klein894d5612017-03-07 07:59:52 -05001333 0xf2623d06, //vsub.f32 d19, d2, d6
1334 0xf2634d07, //vsub.f32 d20, d3, d7
1335 0xe2811008, //add r1, r1, #8
1336 0xe5933000, //ldr r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001337 0xf2240114, //vorr d0, d4, d4
Mike Klein894d5612017-03-07 07:59:52 -05001338 0xf2262116, //vorr d2, d6, d6
1339 0xe0833000, //add r3, r3, r0
1340 0xf2273117, //vorr d3, d7, d7
1341 0xe1d330b0, //ldrh r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001342 0xe1cd30b0, //strh r3, [sp]
1343 0xe1a0300d, //mov r3, sp
Mike Klein894d5612017-03-07 07:59:52 -05001344 0xf4e3041f, //vld1.16 {d16[0]}, [r3 :16]
1345 0xf3c80a30, //vmovl.u8 q8, d16
1346 0xf3d00a30, //vmovl.u16 q8, d16
1347 0xf3fb06a0, //vcvt.f32.u32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001348 0xeddf1b08, //vldr d17, [pc, #32]
1349 0xf3400db1, //vmul.f32 d16, d16, d17
1350 0xf2611d05, //vsub.f32 d17, d1, d5
1351 0xf2251115, //vorr d1, d5, d5
1352 0xf2020cb0, //vfma.f32 d0, d18, d16
1353 0xf2011cb0, //vfma.f32 d1, d17, d16
Mike Klein894d5612017-03-07 07:59:52 -05001354 0xf2032cb0, //vfma.f32 d2, d19, d16
1355 0xf2043cb0, //vfma.f32 d3, d20, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001356 0xe28dd004, //add sp, sp, #4
Mike Klein894d5612017-03-07 07:59:52 -05001357 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001358 0x3b808081, //.word 0x3b808081
1359 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001360};
1361
1362CODE const uint32_t sk_lerp_565_vfp4[] = {
1363 0xed2d8b04, //vpush {d8-d9}
1364 0xe24dd008, //sub sp, sp, #8
1365 0xe8911008, //ldm r1, {r3, ip}
1366 0xf2603d04, //vsub.f32 d19, d0, d4
1367 0xf2240114, //vorr d0, d4, d4
1368 0xe2811008, //add r1, r1, #8
1369 0xe5933000, //ldr r3, [r3]
1370 0xe7933080, //ldr r3, [r3, r0, lsl #1]
1371 0xe58d3004, //str r3, [sp, #4]
1372 0xe28d3004, //add r3, sp, #4
1373 0xed923a1d, //vldr s6, [r2, #116]
1374 0xf4e3083f, //vld1.32 {d16[0]}, [r3 :32]
1375 0xe282306c, //add r3, r2, #108
1376 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1377 0xe2823068, //add r3, r2, #104
1378 0xf3d04a30, //vmovl.u16 q10, d16
1379 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1380 0xe2823070, //add r3, r2, #112
1381 0xf24201b4, //vand d16, d18, d20
1382 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1383 0xf24221b4, //vand d18, d18, d20
1384 0xf24111b4, //vand d17, d17, d20
1385 0xf3fb0620, //vcvt.f32.s32 d16, d16
1386 0xed928a1e, //vldr s16, [r2, #120]
1387 0xf3fb1621, //vcvt.f32.s32 d17, d17
1388 0xed929a1f, //vldr s18, [r2, #124]
1389 0xf3fb2622, //vcvt.f32.s32 d18, d18
1390 0xf2614d05, //vsub.f32 d20, d1, d5
1391 0xf2e009c3, //vmul.f32 d16, d16, d3[0]
Mike Klein894d5612017-03-07 07:59:52 -05001392 0xf2625d06, //vsub.f32 d21, d2, d6
1393 0xf2e119c8, //vmul.f32 d17, d17, d8[0]
1394 0xf2e229c9, //vmul.f32 d18, d18, d9[0]
1395 0xf2251115, //vorr d1, d5, d5
1396 0xf2262116, //vorr d2, d6, d6
1397 0xf2030cb0, //vfma.f32 d0, d19, d16
1398 0xf2041cb1, //vfma.f32 d1, d20, d17
1399 0xf2052cb2, //vfma.f32 d2, d21, d18
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001400 0xf2873f10, //vmov.f32 d3, #1
Mike Klein894d5612017-03-07 07:59:52 -05001401 0xe28dd008, //add sp, sp, #8
1402 0xecbd8b04, //vpop {d8-d9}
1403 0xe12fff1c, //bx ip
1404};
1405
1406CODE const uint32_t sk_load_tables_vfp4[] = {
1407 0xe92d48f0, //push {r4, r5, r6, r7, fp, lr}
1408 0xe8911008, //ldm r1, {r3, ip}
1409 0xe2826010, //add r6, r2, #16
1410 0xe2811008, //add r1, r1, #8
1411 0xe593e000, //ldr lr, [r3]
1412 0xe99300b0, //ldmib r3, {r4, r5, r7}
1413 0xf4e60c9f, //vld1.32 {d16[]}, [r6 :32]
1414 0xe08e6100, //add r6, lr, r0, lsl #2
1415 0xedd61b00, //vldr d17, [r6]
1416 0xf24021b1, //vand d18, d16, d17
1417 0xed922a03, //vldr s4, [r2, #12]
1418 0xf3f03031, //vshr.u32 d19, d17, #16
1419 0xee326b90, //vmov.32 r6, d18[1]
1420 0xe0846106, //add r6, r4, r6, lsl #2
1421 0xedd60a00, //vldr s1, [r6]
1422 0xee126b90, //vmov.32 r6, d18[0]
1423 0xf3f82031, //vshr.u32 d18, d17, #8
1424 0xf24021b2, //vand d18, d16, d18
1425 0xf24001b3, //vand d16, d16, d19
1426 0xee103b90, //vmov.32 r3, d16[0]
1427 0xe0846106, //add r6, r4, r6, lsl #2
1428 0xee304b90, //vmov.32 r4, d16[1]
1429 0xf3e80031, //vshr.u32 d16, d17, #24
1430 0xed960a00, //vldr s0, [r6]
1431 0xee326b90, //vmov.32 r6, d18[1]
1432 0xf3fb0620, //vcvt.f32.s32 d16, d16
1433 0xe0873103, //add r3, r7, r3, lsl #2
1434 0xf2a039c2, //vmul.f32 d3, d16, d2[0]
1435 0xe0874104, //add r4, r7, r4, lsl #2
1436 0xedd42a00, //vldr s5, [r4]
1437 0xe0856106, //add r6, r5, r6, lsl #2
1438 0xed932a00, //vldr s4, [r3]
1439 0xedd61a00, //vldr s3, [r6]
1440 0xee126b90, //vmov.32 r6, d18[0]
1441 0xe0856106, //add r6, r5, r6, lsl #2
1442 0xed961a00, //vldr s2, [r6]
1443 0xe8bd48f0, //pop {r4, r5, r6, r7, fp, lr}
1444 0xe12fff1c, //bx ip
1445};
1446
1447CODE const uint32_t sk_load_a8_vfp4[] = {
1448 0xe24dd004, //sub sp, sp, #4
1449 0xe8911008, //ldm r1, {r3, ip}
1450 0xe2811008, //add r1, r1, #8
1451 0xf2801010, //vmov.i32 d1, #0
1452 0xf2802010, //vmov.i32 d2, #0
1453 0xe5933000, //ldr r3, [r3]
1454 0xe0833000, //add r3, r3, r0
1455 0xe1d330b0, //ldrh r3, [r3]
1456 0xe1cd30b0, //strh r3, [sp]
1457 0xe1a0300d, //mov r3, sp
1458 0xf4e3041f, //vld1.16 {d16[0]}, [r3 :16]
1459 0xed920a03, //vldr s0, [r2, #12]
1460 0xf3c80a30, //vmovl.u8 q8, d16
1461 0xf3d00a30, //vmovl.u16 q8, d16
1462 0xf3fb06a0, //vcvt.f32.u32 d16, d16
1463 0xf2a039c0, //vmul.f32 d3, d16, d0[0]
1464 0xf2800010, //vmov.i32 d0, #0
1465 0xe28dd004, //add sp, sp, #4
1466 0xe12fff1c, //bx ip
1467};
1468
1469CODE const uint32_t sk_store_a8_vfp4[] = {
1470 0xe92d4800, //push {fp, lr}
1471 0xe2823008, //add r3, r2, #8
1472 0xf2c3061f, //vmov.i32 d16, #1056964608
1473 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1474 0xe5913000, //ldr r3, [r1]
1475 0xf2430c31, //vfma.f32 d16, d3, d17
1476 0xe5933000, //ldr r3, [r3]
1477 0xf3fb07a0, //vcvt.u32.f32 d16, d16
1478 0xee10eb90, //vmov.32 lr, d16[0]
1479 0xee30cb90, //vmov.32 ip, d16[1]
1480 0xe7e3e000, //strb lr, [r3, r0]!
1481 0xe5c3c001, //strb ip, [r3, #1]
1482 0xe5913004, //ldr r3, [r1, #4]
1483 0xe2811008, //add r1, r1, #8
1484 0xe8bd4800, //pop {fp, lr}
1485 0xe12fff13, //bx r3
1486};
1487
1488CODE const uint32_t sk_load_565_vfp4[] = {
1489 0xe24dd004, //sub sp, sp, #4
1490 0xe8911008, //ldm r1, {r3, ip}
1491 0xe2811008, //add r1, r1, #8
1492 0xe5933000, //ldr r3, [r3]
1493 0xe7933080, //ldr r3, [r3, r0, lsl #1]
1494 0xe58d3000, //str r3, [sp]
1495 0xe1a0300d, //mov r3, sp
1496 0xf4e3083f, //vld1.32 {d16[0]}, [r3 :32]
1497 0xe282306c, //add r3, r2, #108
1498 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1499 0xe2823068, //add r3, r2, #104
1500 0xf3d04a30, //vmovl.u16 q10, d16
1501 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1502 0xe2823070, //add r3, r2, #112
1503 0xf24201b4, //vand d16, d18, d20
1504 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1505 0xf24111b4, //vand d17, d17, d20
1506 0xf24221b4, //vand d18, d18, d20
1507 0xf4a23c9f, //vld1.32 {d3[]}, [r2 :32]
1508 0xf3fb0620, //vcvt.f32.s32 d16, d16
1509 0xf3fb1621, //vcvt.f32.s32 d17, d17
1510 0xf3fb2622, //vcvt.f32.s32 d18, d18
1511 0xed920a1d, //vldr s0, [r2, #116]
1512 0xed921a1e, //vldr s2, [r2, #120]
1513 0xed922a1f, //vldr s4, [r2, #124]
1514 0xf2a009c0, //vmul.f32 d0, d16, d0[0]
1515 0xf2a119c1, //vmul.f32 d1, d17, d1[0]
1516 0xf2a229c2, //vmul.f32 d2, d18, d2[0]
1517 0xe28dd004, //add sp, sp, #4
1518 0xe12fff1c, //bx ip
1519};
1520
1521CODE const uint32_t sk_store_565_vfp4[] = {
1522 0xe2823080, //add r3, r2, #128
1523 0xf2c3361f, //vmov.i32 d19, #1056964608
1524 0xf2c3461f, //vmov.i32 d20, #1056964608
1525 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1526 0xe2823084, //add r3, r2, #132
1527 0xf2403c31, //vfma.f32 d19, d0, d17
1528 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1529 0xf2c3061f, //vmov.i32 d16, #1056964608
1530 0xf2414c32, //vfma.f32 d20, d1, d18
1531 0xf2420c31, //vfma.f32 d16, d2, d17
1532 0xe5913000, //ldr r3, [r1]
1533 0xe5933000, //ldr r3, [r3]
1534 0xf3fb17a3, //vcvt.u32.f32 d17, d19
1535 0xe0833080, //add r3, r3, r0, lsl #1
1536 0xf3fb27a4, //vcvt.u32.f32 d18, d20
1537 0xf3fb07a0, //vcvt.u32.f32 d16, d16
1538 0xf2eb1531, //vshl.s32 d17, d17, #11
1539 0xf2e52532, //vshl.s32 d18, d18, #5
1540 0xf26101b0, //vorr d16, d17, d16
1541 0xf26001b2, //vorr d16, d16, d18
1542 0xf3f60121, //vuzp.16 d16, d17
1543 0xf4c3080f, //vst1.32 {d16[0]}, [r3]
1544 0xe5913004, //ldr r3, [r1, #4]
1545 0xe2811008, //add r1, r1, #8
1546 0xe12fff13, //bx r3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001547 0xe320f000, //nop {0}
Mike Klein894d5612017-03-07 07:59:52 -05001548};
1549
1550CODE const uint32_t sk_load_8888_vfp4[] = {
Mike Klein894d5612017-03-07 07:59:52 -05001551 0xe8911008, //ldm r1, {r3, ip}
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001552 0xf3c7001f, //vmov.i32 d16, #255
Mike Klein894d5612017-03-07 07:59:52 -05001553 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001554 0xe5933000, //ldr r3, [r3]
1555 0xe0833100, //add r3, r3, r0, lsl #2
Mike Klein894d5612017-03-07 07:59:52 -05001556 0xedd31b00, //vldr d17, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001557 0xf24121b0, //vand d18, d17, d16
Mike Klein894d5612017-03-07 07:59:52 -05001558 0xf3f83031, //vshr.u32 d19, d17, #8
1559 0xf3e84031, //vshr.u32 d20, d17, #24
1560 0xf3f01031, //vshr.u32 d17, d17, #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001561 0xf24331b0, //vand d19, d19, d16
1562 0xf24101b0, //vand d16, d17, d16
1563 0xeddf1b08, //vldr d17, [pc, #32]
Mike Klein894d5612017-03-07 07:59:52 -05001564 0xf3fb2622, //vcvt.f32.s32 d18, d18
1565 0xf3fb4624, //vcvt.f32.s32 d20, d20
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001566 0xf3fb3623, //vcvt.f32.s32 d19, d19
Mike Klein894d5612017-03-07 07:59:52 -05001567 0xf3fb0620, //vcvt.f32.s32 d16, d16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001568 0xf3020db1, //vmul.f32 d0, d18, d17
1569 0xf3043db1, //vmul.f32 d3, d20, d17
1570 0xf3031db1, //vmul.f32 d1, d19, d17
1571 0xf3002db1, //vmul.f32 d2, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001572 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001573 0x3b808081, //.word 0x3b808081
1574 0x3b808081, //.word 0x3b808081
Mike Klein894d5612017-03-07 07:59:52 -05001575};
1576
1577CODE const uint32_t sk_store_8888_vfp4[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001578 0xeddf0b1a, //vldr d16, [pc, #104]
Mike Klein894d5612017-03-07 07:59:52 -05001579 0xf2c3261f, //vmov.i32 d18, #1056964608
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001580 0xf2412c30, //vfma.f32 d18, d1, d16
Mike Klein894d5612017-03-07 07:59:52 -05001581 0xe5913000, //ldr r3, [r1]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001582 0xf2c3361f, //vmov.i32 d19, #1056964608
1583 0xf2c3161f, //vmov.i32 d17, #1056964608
1584 0xf2423c30, //vfma.f32 d19, d2, d16
Mike Klein894d5612017-03-07 07:59:52 -05001585 0xe5933000, //ldr r3, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001586 0xf2c3461f, //vmov.i32 d20, #1056964608
1587 0xf2401c30, //vfma.f32 d17, d0, d16
Mike Klein894d5612017-03-07 07:59:52 -05001588 0xe0833100, //add r3, r3, r0, lsl #2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001589 0xf2434c30, //vfma.f32 d20, d3, d16
1590 0xf3fb07a2, //vcvt.u32.f32 d16, d18
Mike Klein894d5612017-03-07 07:59:52 -05001591 0xf3fb27a3, //vcvt.u32.f32 d18, d19
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001592 0xf3fb17a1, //vcvt.u32.f32 d17, d17
Mike Klein894d5612017-03-07 07:59:52 -05001593 0xf3fb37a4, //vcvt.u32.f32 d19, d20
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001594 0xf2e80530, //vshl.s32 d16, d16, #8
Mike Klein894d5612017-03-07 07:59:52 -05001595 0xf2f02532, //vshl.s32 d18, d18, #16
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001596 0xf26001b1, //vorr d16, d16, d17
Mike Klein894d5612017-03-07 07:59:52 -05001597 0xf2f81533, //vshl.s32 d17, d19, #24
1598 0xf26001b2, //vorr d16, d16, d18
1599 0xf26001b1, //vorr d16, d16, d17
1600 0xedc30b00, //vstr d16, [r3]
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001601 0xe2813008, //add r3, r1, #8
1602 0xe591c004, //ldr ip, [r1, #4]
1603 0xe1a01003, //mov r1, r3
1604 0xe12fff1c, //bx ip
1605 0xe320f000, //nop {0}
1606 0x437f0000, //.word 0x437f0000
1607 0x437f0000, //.word 0x437f0000
Mike Klein894d5612017-03-07 07:59:52 -05001608};
1609
1610CODE const uint32_t sk_load_f16_vfp4[] = {
1611 0xed2d8b04, //vpush {d8-d9}
1612 0xe8911008, //ldm r1, {r3, ip}
1613 0xe2811008, //add r1, r1, #8
1614 0xe5933000, //ldr r3, [r3]
1615 0xe0833180, //add r3, r3, r0, lsl #3
1616 0xf463084f, //vld2.16 {d16-d17}, [r3]
1617 0xf3b62720, //vcvt.f32.f16 q1, d16
1618 0xf3b68721, //vcvt.f32.f16 q4, d17
1619 0xf2220112, //vorr d0, d2, d2
1620 0xeef00a43, //vmov.f32 s1, s6
1621 0xf2281118, //vorr d1, d8, d8
1622 0xeeb03a62, //vmov.f32 s6, s5
1623 0xeef01a49, //vmov.f32 s3, s18
1624 0xeeb09a68, //vmov.f32 s18, s17
1625 0xeeb02b43, //vmov.f64 d2, d3
1626 0xeeb03b49, //vmov.f64 d3, d9
1627 0xecbd8b04, //vpop {d8-d9}
1628 0xe12fff1c, //bx ip
1629};
1630
1631CODE const uint32_t sk_store_f16_vfp4[] = {
1632 0xeef00b41, //vmov.f64 d16, d1
1633 0xeef03b42, //vmov.f64 d19, d2
1634 0xf2631113, //vorr d17, d3, d3
1635 0xf2602110, //vorr d18, d0, d0
1636 0xf3fa00a1, //vtrn.32 d16, d17
1637 0xf3f61620, //vcvt.f16.f32 d17, q8
1638 0xf3fa20a3, //vtrn.32 d18, d19
1639 0xe5913000, //ldr r3, [r1]
1640 0xf3f60622, //vcvt.f16.f32 d16, q9
1641 0xe5933000, //ldr r3, [r3]
1642 0xe0833180, //add r3, r3, r0, lsl #3
1643 0xf443084f, //vst2.16 {d16-d17}, [r3]
1644 0xe2813008, //add r3, r1, #8
1645 0xe591c004, //ldr ip, [r1, #4]
1646 0xe1a01003, //mov r1, r3
1647 0xe12fff1c, //bx ip
1648};
1649
1650CODE const uint32_t sk_store_f32_vfp4[] = {
1651 0xe5913000, //ldr r3, [r1]
1652 0xe5933000, //ldr r3, [r3]
1653 0xe0833200, //add r3, r3, r0, lsl #4
1654 0xf403008f, //vst4.32 {d0-d3}, [r3]
1655 0xe2813008, //add r3, r1, #8
1656 0xe591c004, //ldr ip, [r1, #4]
1657 0xe1a01003, //mov r1, r3
1658 0xe12fff1c, //bx ip
1659};
1660
1661CODE const uint32_t sk_clamp_x_vfp4[] = {
1662 0xe8911008, //ldm r1, {r3, ip}
1663 0xf2c00010, //vmov.i32 d16, #0
1664 0xf3c71e1f, //vmov.i8 d17, #255
1665 0xf2400f80, //vmax.f32 d16, d16, d0
1666 0xe2811008, //add r1, r1, #8
1667 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1668 0xf26218a1, //vadd.i32 d17, d18, d17
1669 0xf2200fa1, //vmin.f32 d0, d16, d17
1670 0xe12fff1c, //bx ip
1671};
1672
1673CODE const uint32_t sk_clamp_y_vfp4[] = {
1674 0xe8911008, //ldm r1, {r3, ip}
1675 0xf2c00010, //vmov.i32 d16, #0
1676 0xf3c71e1f, //vmov.i8 d17, #255
1677 0xf2400f81, //vmax.f32 d16, d16, d1
1678 0xe2811008, //add r1, r1, #8
1679 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1680 0xf26218a1, //vadd.i32 d17, d18, d17
1681 0xf2201fa1, //vmin.f32 d1, d16, d17
1682 0xe12fff1c, //bx ip
1683};
1684
1685CODE const uint32_t sk_repeat_x_vfp4[] = {
1686 0xed2d8b04, //vpush {d8-d9}
1687 0xe8911008, //ldm r1, {r3, ip}
1688 0xf2c02010, //vmov.i32 d18, #0
Mike Klein894d5612017-03-07 07:59:52 -05001689 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001690 0xeddf3b10, //vldr d19, [pc, #64]
Mike Klein894d5612017-03-07 07:59:52 -05001691 0xed938a00, //vldr s16, [r3]
1692 0xeec09a88, //vdiv.f32 s19, s1, s16
1693 0xee809a08, //vdiv.f32 s18, s0, s16
1694 0xf3fb0709, //vcvt.s32.f32 d16, d9
1695 0xf3fb0620, //vcvt.f32.s32 d16, d16
1696 0xf3601e89, //vcgt.f32 d17, d16, d9
1697 0xf35311b2, //vbsl d17, d19, d18
1698 0xf3f42c08, //vdup.32 d18, d8[0]
1699 0xf2600da1, //vsub.f32 d16, d16, d17
1700 0xf3c71e1f, //vmov.i8 d17, #255
1701 0xf26218a1, //vadd.i32 d17, d18, d17
1702 0xf2e009c8, //vmul.f32 d16, d16, d8[0]
1703 0xf2600d20, //vsub.f32 d16, d0, d16
1704 0xf2200fa1, //vmin.f32 d0, d16, d17
1705 0xecbd8b04, //vpop {d8-d9}
1706 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001707 0xe320f000, //nop {0}
1708 0x3f800000, //.word 0x3f800000
1709 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001710};
1711
1712CODE const uint32_t sk_repeat_y_vfp4[] = {
1713 0xed2d8b04, //vpush {d8-d9}
1714 0xe8911008, //ldm r1, {r3, ip}
1715 0xf2c02010, //vmov.i32 d18, #0
Mike Klein894d5612017-03-07 07:59:52 -05001716 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001717 0xeddf3b10, //vldr d19, [pc, #64]
Mike Klein894d5612017-03-07 07:59:52 -05001718 0xed938a00, //vldr s16, [r3]
1719 0xeec19a88, //vdiv.f32 s19, s3, s16
1720 0xee819a08, //vdiv.f32 s18, s2, s16
1721 0xf3fb0709, //vcvt.s32.f32 d16, d9
1722 0xf3fb0620, //vcvt.f32.s32 d16, d16
1723 0xf3601e89, //vcgt.f32 d17, d16, d9
1724 0xf35311b2, //vbsl d17, d19, d18
1725 0xf3f42c08, //vdup.32 d18, d8[0]
1726 0xf2600da1, //vsub.f32 d16, d16, d17
1727 0xf3c71e1f, //vmov.i8 d17, #255
1728 0xf26218a1, //vadd.i32 d17, d18, d17
1729 0xf2e009c8, //vmul.f32 d16, d16, d8[0]
1730 0xf2610d20, //vsub.f32 d16, d1, d16
1731 0xf2201fa1, //vmin.f32 d1, d16, d17
1732 0xecbd8b04, //vpop {d8-d9}
1733 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001734 0xe320f000, //nop {0}
1735 0x3f800000, //.word 0x3f800000
1736 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001737};
1738
1739CODE const uint32_t sk_mirror_x_vfp4[] = {
1740 0xed2d8b04, //vpush {d8-d9}
1741 0xe8911008, //ldm r1, {r3, ip}
1742 0xf2c03010, //vmov.i32 d19, #0
Mike Klein894d5612017-03-07 07:59:52 -05001743 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001744 0xeddf4b14, //vldr d20, [pc, #80]
Mike Klein894d5612017-03-07 07:59:52 -05001745 0xed938a00, //vldr s16, [r3]
1746 0xee389a08, //vadd.f32 s18, s16, s16
1747 0xf3f40c08, //vdup.32 d16, d8[0]
1748 0xf2200d20, //vsub.f32 d0, d0, d16
1749 0xeec08a89, //vdiv.f32 s17, s1, s18
1750 0xee808a09, //vdiv.f32 s16, s0, s18
1751 0xf3fb1708, //vcvt.s32.f32 d17, d8
1752 0xf3fb1621, //vcvt.f32.s32 d17, d17
1753 0xf3612e88, //vcgt.f32 d18, d17, d8
1754 0xf35421b3, //vbsl d18, d20, d19
1755 0xf2611da2, //vsub.f32 d17, d17, d18
1756 0xf3c72e1f, //vmov.i8 d18, #255
1757 0xf2e119c9, //vmul.f32 d17, d17, d9[0]
1758 0xf2601d21, //vsub.f32 d17, d0, d17
1759 0xf2611da0, //vsub.f32 d17, d17, d16
1760 0xf26008a2, //vadd.i32 d16, d16, d18
1761 0xf3f91721, //vabs.f32 d17, d17
1762 0xf2210fa0, //vmin.f32 d0, d17, d16
1763 0xecbd8b04, //vpop {d8-d9}
1764 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001765 0xe320f000, //nop {0}
1766 0x3f800000, //.word 0x3f800000
1767 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001768};
1769
1770CODE const uint32_t sk_mirror_y_vfp4[] = {
1771 0xed2d8b04, //vpush {d8-d9}
1772 0xe8911008, //ldm r1, {r3, ip}
1773 0xf2c03010, //vmov.i32 d19, #0
Mike Klein894d5612017-03-07 07:59:52 -05001774 0xe2811008, //add r1, r1, #8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001775 0xeddf4b14, //vldr d20, [pc, #80]
Mike Klein894d5612017-03-07 07:59:52 -05001776 0xed938a00, //vldr s16, [r3]
1777 0xee389a08, //vadd.f32 s18, s16, s16
1778 0xf3f40c08, //vdup.32 d16, d8[0]
1779 0xf2211d20, //vsub.f32 d1, d1, d16
1780 0xeec18a89, //vdiv.f32 s17, s3, s18
1781 0xee818a09, //vdiv.f32 s16, s2, s18
1782 0xf3fb1708, //vcvt.s32.f32 d17, d8
1783 0xf3fb1621, //vcvt.f32.s32 d17, d17
1784 0xf3612e88, //vcgt.f32 d18, d17, d8
1785 0xf35421b3, //vbsl d18, d20, d19
1786 0xf2611da2, //vsub.f32 d17, d17, d18
1787 0xf3c72e1f, //vmov.i8 d18, #255
1788 0xf2e119c9, //vmul.f32 d17, d17, d9[0]
1789 0xf2611d21, //vsub.f32 d17, d1, d17
1790 0xf2611da0, //vsub.f32 d17, d17, d16
1791 0xf26008a2, //vadd.i32 d16, d16, d18
1792 0xf3f91721, //vabs.f32 d17, d17
1793 0xf2211fa0, //vmin.f32 d1, d17, d16
1794 0xecbd8b04, //vpop {d8-d9}
1795 0xe12fff1c, //bx ip
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05001796 0xe320f000, //nop {0}
1797 0x3f800000, //.word 0x3f800000
1798 0x3f800000, //.word 0x3f800000
Mike Klein894d5612017-03-07 07:59:52 -05001799};
1800
Mike Kleine9ed07d2017-03-07 12:28:11 -05001801CODE const uint32_t sk_luminance_to_alpha_vfp4[] = {
1802 0xed2d8b02, //vpush {d8}
1803 0xed923a22, //vldr s6, [r2, #136]
1804 0xe2823090, //add r3, r2, #144
1805 0xed928a23, //vldr s16, [r2, #140]
1806 0xf2e01943, //vmul.f32 d17, d0, d3[0]
1807 0xf2e10948, //vmul.f32 d16, d1, d8[0]
1808 0xf2800010, //vmov.i32 d0, #0
1809 0xf2801010, //vmov.i32 d1, #0
1810 0xf2013da0, //vadd.f32 d3, d17, d16
1811 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1812 0xf2003c92, //vfma.f32 d3, d16, d2
1813 0xe4913004, //ldr r3, [r1], #4
1814 0xf2802010, //vmov.i32 d2, #0
1815 0xecbd8b02, //vpop {d8}
1816 0xe12fff13, //bx r3
1817};
1818
Mike Klein894d5612017-03-07 07:59:52 -05001819CODE const uint32_t sk_matrix_2x3_vfp4[] = {
1820 0xe92d4800, //push {fp, lr}
1821 0xe591e000, //ldr lr, [r1]
1822 0xe591c004, //ldr ip, [r1, #4]
1823 0xe2811008, //add r1, r1, #8
1824 0xe28e300c, //add r3, lr, #12
1825 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1826 0xe28e3008, //add r3, lr, #8
1827 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1828 0xe28e3010, //add r3, lr, #16
1829 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1830 0xe28e3014, //add r3, lr, #20
1831 0xf2410c31, //vfma.f32 d16, d1, d17
1832 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1833 0xe28e3004, //add r3, lr, #4
1834 0xf2411c32, //vfma.f32 d17, d1, d18
1835 0xf4ee2c9f, //vld1.32 {d18[]}, [lr :32]
1836 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1837 0xf2400c32, //vfma.f32 d16, d0, d18
1838 0xf2401c33, //vfma.f32 d17, d0, d19
1839 0xf22001b0, //vorr d0, d16, d16
1840 0xf22111b1, //vorr d1, d17, d17
1841 0xe8bd4800, //pop {fp, lr}
1842 0xe12fff1c, //bx ip
1843};
1844
1845CODE const uint32_t sk_matrix_3x4_vfp4[] = {
1846 0xe92d4800, //push {fp, lr}
1847 0xe591e000, //ldr lr, [r1]
1848 0xe591c004, //ldr ip, [r1, #4]
1849 0xe2811008, //add r1, r1, #8
1850 0xe28e3020, //add r3, lr, #32
1851 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1852 0xe28e302c, //add r3, lr, #44
1853 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1854 0xe28e301c, //add r3, lr, #28
1855 0xf2420c33, //vfma.f32 d16, d2, d19
1856 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1857 0xe28e3018, //add r3, lr, #24
1858 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1859 0xe28e3024, //add r3, lr, #36
1860 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1861 0xe28e3028, //add r3, lr, #40
1862 0xf2421c32, //vfma.f32 d17, d2, d18
1863 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1864 0xe28e3010, //add r3, lr, #16
1865 0xf2422c34, //vfma.f32 d18, d2, d20
1866 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1867 0xe28e300c, //add r3, lr, #12
1868 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1869 0xe28e3014, //add r3, lr, #20
1870 0xf2411c34, //vfma.f32 d17, d1, d20
1871 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1872 0xf2410c34, //vfma.f32 d16, d1, d20
1873 0xe28e3004, //add r3, lr, #4
1874 0xf2412c33, //vfma.f32 d18, d1, d19
1875 0xf4ee3c9f, //vld1.32 {d19[]}, [lr :32]
1876 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1877 0xe28e3008, //add r3, lr, #8
1878 0xf2401c33, //vfma.f32 d17, d0, d19
1879 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1880 0xf2400c33, //vfma.f32 d16, d0, d19
1881 0xf2402c34, //vfma.f32 d18, d0, d20
1882 0xf22101b1, //vorr d0, d17, d17
1883 0xf22021b0, //vorr d2, d16, d16
1884 0xf22211b2, //vorr d1, d18, d18
1885 0xe8bd4800, //pop {fp, lr}
1886 0xe12fff1c, //bx ip
1887};
1888
Mike Kleine9ed07d2017-03-07 12:28:11 -05001889CODE const uint32_t sk_matrix_4x5_vfp4[] = {
1890 0xe92d4800, //push {fp, lr}
1891 0xe591e000, //ldr lr, [r1]
1892 0xf2630113, //vorr d16, d3, d3
1893 0xf2621112, //vorr d17, d2, d2
1894 0xe591c004, //ldr ip, [r1, #4]
1895 0xe28e301c, //add r3, lr, #28
1896 0xe2811008, //add r1, r1, #8
1897 0xf4ee4c9f, //vld1.32 {d20[]}, [lr :32]
1898 0xf4e35c9f, //vld1.32 {d21[]}, [r3 :32]
1899 0xe28e302c, //add r3, lr, #44
1900 0xf4e36c9f, //vld1.32 {d22[]}, [r3 :32]
1901 0xe28e303c, //add r3, lr, #60
1902 0xf4e37c9f, //vld1.32 {d23[]}, [r3 :32]
1903 0xe28e304c, //add r3, lr, #76
1904 0xf4a33c9f, //vld1.32 {d3[]}, [r3 :32]
1905 0xe28e3038, //add r3, lr, #56
1906 0xf2003cb7, //vfma.f32 d3, d16, d23
1907 0xf4e38c9f, //vld1.32 {d24[]}, [r3 :32]
1908 0xe28e3048, //add r3, lr, #72
1909 0xf4a32c9f, //vld1.32 {d2[]}, [r3 :32]
1910 0xe28e3034, //add r3, lr, #52
1911 0xf2002cb8, //vfma.f32 d2, d16, d24
1912 0xf4e39c9f, //vld1.32 {d25[]}, [r3 :32]
1913 0xe28e3030, //add r3, lr, #48
1914 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1915 0xe28e3040, //add r3, lr, #64
1916 0xf2013cb6, //vfma.f32 d3, d17, d22
1917 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1918 0xe28e3044, //add r3, lr, #68
1919 0xf2402cb3, //vfma.f32 d18, d16, d19
1920 0xf4e33c9f, //vld1.32 {d19[]}, [r3 :32]
1921 0xe28e3024, //add r3, lr, #36
1922 0xf2403cb9, //vfma.f32 d19, d16, d25
1923 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1924 0xe28e3020, //add r3, lr, #32
1925 0xf4e37c9f, //vld1.32 {d23[]}, [r3 :32]
1926 0xe28e3028, //add r3, lr, #40
1927 0xf2013c35, //vfma.f32 d3, d1, d21
1928 0xf2412cb7, //vfma.f32 d18, d17, d23
1929 0xf4e37c9f, //vld1.32 {d23[]}, [r3 :32]
1930 0xe28e3014, //add r3, lr, #20
1931 0xf2012cb7, //vfma.f32 d2, d17, d23
1932 0xf2413cb0, //vfma.f32 d19, d17, d16
1933 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1934 0xe28e3010, //add r3, lr, #16
1935 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1936 0xe28e3018, //add r3, lr, #24
1937 0xf2412c31, //vfma.f32 d18, d1, d17
1938 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1939 0xe28e3008, //add r3, lr, #8
1940 0xf2012c31, //vfma.f32 d2, d1, d17
1941 0xf2413c30, //vfma.f32 d19, d1, d16
1942 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1943 0xe28e3004, //add r3, lr, #4
1944 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1945 0xe28e300c, //add r3, lr, #12
1946 0xf2402c34, //vfma.f32 d18, d0, d20
1947 0xf2002c30, //vfma.f32 d2, d0, d16
1948 0xf2403c31, //vfma.f32 d19, d0, d17
1949 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1950 0xf2003c31, //vfma.f32 d3, d0, d17
1951 0xf22201b2, //vorr d0, d18, d18
1952 0xf22311b3, //vorr d1, d19, d19
1953 0xe8bd4800, //pop {fp, lr}
1954 0xe12fff1c, //bx ip
1955};
1956
Mike Klein894d5612017-03-07 07:59:52 -05001957CODE const uint32_t sk_matrix_perspective_vfp4[] = {
1958 0xe92d4800, //push {fp, lr}
1959 0xe591e000, //ldr lr, [r1]
1960 0xe591c004, //ldr ip, [r1, #4]
1961 0xe2811008, //add r1, r1, #8
1962 0xe28e301c, //add r3, lr, #28
1963 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1964 0xe28e3020, //add r3, lr, #32
1965 0xf4e31c9f, //vld1.32 {d17[]}, [r3 :32]
1966 0xe28e3018, //add r3, lr, #24
1967 0xf2411c30, //vfma.f32 d17, d1, d16
1968 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1969 0xe28e3010, //add r3, lr, #16
1970 0xf2401c30, //vfma.f32 d17, d0, d16
1971 0xf4e30c9f, //vld1.32 {d16[]}, [r3 :32]
1972 0xe28e3004, //add r3, lr, #4
1973 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1974 0xe28e3008, //add r3, lr, #8
1975 0xf4e34c9f, //vld1.32 {d20[]}, [r3 :32]
1976 0xe28e3014, //add r3, lr, #20
1977 0xf2414c32, //vfma.f32 d20, d1, d18
1978 0xf4e32c9f, //vld1.32 {d18[]}, [r3 :32]
1979 0xe28e300c, //add r3, lr, #12
1980 0xf3fb3521, //vrecpe.f32 d19, d17
1981 0xf2412c30, //vfma.f32 d18, d1, d16
1982 0xf4e35c9f, //vld1.32 {d21[]}, [r3 :32]
1983 0xf2410fb3, //vrecps.f32 d16, d17, d19
1984 0xf4ee1c9f, //vld1.32 {d17[]}, [lr :32]
1985 0xf2404c31, //vfma.f32 d20, d0, d17
1986 0xf2402c35, //vfma.f32 d18, d0, d21
1987 0xf3430db0, //vmul.f32 d16, d19, d16
1988 0xf3040db0, //vmul.f32 d0, d20, d16
1989 0xf3021db0, //vmul.f32 d1, d18, d16
1990 0xe8bd4800, //pop {fp, lr}
1991 0xe12fff1c, //bx ip
1992};
1993
1994CODE const uint32_t sk_linear_gradient_2stops_vfp4[] = {
1995 0xe8911008, //ldm r1, {r3, ip}
1996 0xe2811008, //add r1, r1, #8
1997 0xf4632a0d, //vld1.8 {d18-d19}, [r3]!
1998 0xf4634a0f, //vld1.8 {d20-d21}, [r3]
1999 0xf3f40c22, //vdup.32 d16, d18[0]
2000 0xf3f41c24, //vdup.32 d17, d20[0]
2001 0xf2400c31, //vfma.f32 d16, d0, d17
2002 0xf3fc6c24, //vdup.32 d22, d20[1]
2003 0xf3bc1c22, //vdup.32 d1, d18[1]
2004 0xf3b42c23, //vdup.32 d2, d19[0]
2005 0xf2001c36, //vfma.f32 d1, d0, d22
2006 0xf3f41c25, //vdup.32 d17, d21[0]
2007 0xf3fc4c25, //vdup.32 d20, d21[1]
2008 0xf2002c31, //vfma.f32 d2, d0, d17
2009 0xf3bc3c23, //vdup.32 d3, d19[1]
2010 0xf2003c34, //vfma.f32 d3, d0, d20
2011 0xf22001b0, //vorr d0, d16, d16
2012 0xe12fff1c, //bx ip
2013};
2014#elif defined(__x86_64__)
2015
2016CODE const uint8_t sk_start_pipeline_hsw[] = {
2017 65,87, //push %r15
2018 65,86, //push %r14
2019 65,85, //push %r13
2020 65,84, //push %r12
2021 83, //push %rbx
2022 73,137,205, //mov %rcx,%r13
2023 73,137,214, //mov %rdx,%r14
2024 72,137,251, //mov %rdi,%rbx
2025 72,173, //lods %ds:(%rsi),%rax
2026 73,137,199, //mov %rax,%r15
2027 73,137,244, //mov %rsi,%r12
2028 72,141,67,8, //lea 0x8(%rbx),%rax
2029 76,57,232, //cmp %r13,%rax
2030 118,5, //jbe 28 <_sk_start_pipeline_hsw+0x28>
2031 72,137,223, //mov %rbx,%rdi
2032 235,65, //jmp 69 <_sk_start_pipeline_hsw+0x69>
2033 185,0,0,0,0, //mov $0x0,%ecx
2034 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2035 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2036 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2037 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2038 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
2039 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
2040 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
2041 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
2042 72,137,223, //mov %rbx,%rdi
2043 76,137,230, //mov %r12,%rsi
2044 76,137,242, //mov %r14,%rdx
2045 65,255,215, //callq *%r15
2046 72,141,123,8, //lea 0x8(%rbx),%rdi
2047 72,131,195,16, //add $0x10,%rbx
2048 76,57,235, //cmp %r13,%rbx
2049 72,137,251, //mov %rdi,%rbx
2050 118,191, //jbe 28 <_sk_start_pipeline_hsw+0x28>
2051 76,137,233, //mov %r13,%rcx
2052 72,41,249, //sub %rdi,%rcx
2053 116,41, //je 9a <_sk_start_pipeline_hsw+0x9a>
2054 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2055 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2056 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2057 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2058 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
2059 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
2060 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
2061 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
2062 76,137,230, //mov %r12,%rsi
2063 76,137,242, //mov %r14,%rdx
2064 65,255,215, //callq *%r15
2065 76,137,232, //mov %r13,%rax
2066 91, //pop %rbx
2067 65,92, //pop %r12
2068 65,93, //pop %r13
2069 65,94, //pop %r14
2070 65,95, //pop %r15
2071 197,248,119, //vzeroupper
2072 195, //retq
2073};
2074
2075CODE const uint8_t sk_just_return_hsw[] = {
2076 195, //retq
2077};
2078
2079CODE const uint8_t sk_seed_shader_hsw[] = {
2080 72,173, //lods %ds:(%rsi),%rax
2081 197,249,110,199, //vmovd %edi,%xmm0
2082 196,226,125,24,192, //vbroadcastss %xmm0,%ymm0
2083 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002084 65,184,0,0,0,63, //mov $0x3f000000,%r8d
2085 196,193,121,110,200, //vmovd %r8d,%xmm1
2086 196,226,125,24,201, //vbroadcastss %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05002087 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
2088 197,252,88,66,20, //vaddps 0x14(%rdx),%ymm0,%ymm0
2089 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
2090 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
2091 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002092 184,0,0,128,63, //mov $0x3f800000,%eax
2093 197,249,110,208, //vmovd %eax,%xmm2
2094 196,226,125,24,210, //vbroadcastss %xmm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002095 72,173, //lods %ds:(%rsi),%rax
2096 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2097 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
2098 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
2099 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
2100 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
2101 255,224, //jmpq *%rax
2102};
2103
2104CODE const uint8_t sk_constant_color_hsw[] = {
2105 72,173, //lods %ds:(%rsi),%rax
2106 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
2107 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
2108 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
2109 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
2110 72,173, //lods %ds:(%rsi),%rax
2111 255,224, //jmpq *%rax
2112};
2113
2114CODE const uint8_t sk_clear_hsw[] = {
2115 72,173, //lods %ds:(%rsi),%rax
2116 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2117 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2118 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2119 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
2120 255,224, //jmpq *%rax
2121};
2122
2123CODE const uint8_t sk_plus__hsw[] = {
2124 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
2125 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
2126 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
2127 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
2128 72,173, //lods %ds:(%rsi),%rax
2129 255,224, //jmpq *%rax
2130};
2131
2132CODE const uint8_t sk_srcover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002133 184,0,0,128,63, //mov $0x3f800000,%eax
2134 197,121,110,192, //vmovd %eax,%xmm8
2135 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002136 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
2137 196,194,93,184,192, //vfmadd231ps %ymm8,%ymm4,%ymm0
2138 196,194,85,184,200, //vfmadd231ps %ymm8,%ymm5,%ymm1
2139 196,194,77,184,208, //vfmadd231ps %ymm8,%ymm6,%ymm2
2140 196,194,69,184,216, //vfmadd231ps %ymm8,%ymm7,%ymm3
2141 72,173, //lods %ds:(%rsi),%rax
2142 255,224, //jmpq *%rax
2143};
2144
2145CODE const uint8_t sk_dstover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002146 184,0,0,128,63, //mov $0x3f800000,%eax
2147 197,121,110,192, //vmovd %eax,%xmm8
2148 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002149 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
2150 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
2151 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
2152 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
2153 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
2154 72,173, //lods %ds:(%rsi),%rax
2155 255,224, //jmpq *%rax
2156};
2157
2158CODE const uint8_t sk_clamp_0_hsw[] = {
2159 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
2160 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
2161 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
2162 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
2163 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
2164 72,173, //lods %ds:(%rsi),%rax
2165 255,224, //jmpq *%rax
2166};
2167
2168CODE const uint8_t sk_clamp_1_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002169 184,0,0,128,63, //mov $0x3f800000,%eax
2170 197,121,110,192, //vmovd %eax,%xmm8
2171 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002172 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
2173 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
2174 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
2175 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
2176 72,173, //lods %ds:(%rsi),%rax
2177 255,224, //jmpq *%rax
2178};
2179
2180CODE const uint8_t sk_clamp_a_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002181 184,0,0,128,63, //mov $0x3f800000,%eax
2182 197,121,110,192, //vmovd %eax,%xmm8
2183 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002184 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
2185 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
2186 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
2187 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
2188 72,173, //lods %ds:(%rsi),%rax
2189 255,224, //jmpq *%rax
2190};
2191
2192CODE const uint8_t sk_set_rgb_hsw[] = {
2193 72,173, //lods %ds:(%rsi),%rax
2194 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
2195 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
2196 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
2197 72,173, //lods %ds:(%rsi),%rax
2198 255,224, //jmpq *%rax
2199};
2200
2201CODE const uint8_t sk_swap_rb_hsw[] = {
2202 197,124,40,192, //vmovaps %ymm0,%ymm8
2203 72,173, //lods %ds:(%rsi),%rax
2204 197,252,40,194, //vmovaps %ymm2,%ymm0
2205 197,124,41,194, //vmovaps %ymm8,%ymm2
2206 255,224, //jmpq *%rax
2207};
2208
2209CODE const uint8_t sk_swap_hsw[] = {
2210 197,124,40,195, //vmovaps %ymm3,%ymm8
2211 197,124,40,202, //vmovaps %ymm2,%ymm9
2212 197,124,40,209, //vmovaps %ymm1,%ymm10
2213 197,124,40,216, //vmovaps %ymm0,%ymm11
2214 72,173, //lods %ds:(%rsi),%rax
2215 197,252,40,196, //vmovaps %ymm4,%ymm0
2216 197,252,40,205, //vmovaps %ymm5,%ymm1
2217 197,252,40,214, //vmovaps %ymm6,%ymm2
2218 197,252,40,223, //vmovaps %ymm7,%ymm3
2219 197,124,41,220, //vmovaps %ymm11,%ymm4
2220 197,124,41,213, //vmovaps %ymm10,%ymm5
2221 197,124,41,206, //vmovaps %ymm9,%ymm6
2222 197,124,41,199, //vmovaps %ymm8,%ymm7
2223 255,224, //jmpq *%rax
2224};
2225
2226CODE const uint8_t sk_move_src_dst_hsw[] = {
2227 72,173, //lods %ds:(%rsi),%rax
2228 197,252,40,224, //vmovaps %ymm0,%ymm4
2229 197,252,40,233, //vmovaps %ymm1,%ymm5
2230 197,252,40,242, //vmovaps %ymm2,%ymm6
2231 197,252,40,251, //vmovaps %ymm3,%ymm7
2232 255,224, //jmpq *%rax
2233};
2234
2235CODE const uint8_t sk_move_dst_src_hsw[] = {
2236 72,173, //lods %ds:(%rsi),%rax
2237 197,252,40,196, //vmovaps %ymm4,%ymm0
2238 197,252,40,205, //vmovaps %ymm5,%ymm1
2239 197,252,40,214, //vmovaps %ymm6,%ymm2
2240 197,252,40,223, //vmovaps %ymm7,%ymm3
2241 255,224, //jmpq *%rax
2242};
2243
2244CODE const uint8_t sk_premul_hsw[] = {
2245 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
2246 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
2247 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
2248 72,173, //lods %ds:(%rsi),%rax
2249 255,224, //jmpq *%rax
2250};
2251
2252CODE const uint8_t sk_unpremul_hsw[] = {
2253 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
2254 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002255 184,0,0,128,63, //mov $0x3f800000,%eax
2256 197,121,110,208, //vmovd %eax,%xmm10
2257 196,66,125,24,210, //vbroadcastss %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05002258 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
2259 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
2260 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
2261 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
2262 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
2263 72,173, //lods %ds:(%rsi),%rax
2264 255,224, //jmpq *%rax
2265};
2266
2267CODE const uint8_t sk_from_srgb_hsw[] = {
2268 196,98,125,24,66,64, //vbroadcastss 0x40(%rdx),%ymm8
2269 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
2270 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
2271 196,98,125,24,90,60, //vbroadcastss 0x3c(%rdx),%ymm11
2272 196,98,125,24,98,56, //vbroadcastss 0x38(%rdx),%ymm12
2273 196,65,124,40,235, //vmovaps %ymm11,%ymm13
2274 196,66,125,168,236, //vfmadd213ps %ymm12,%ymm0,%ymm13
2275 196,98,125,24,114,52, //vbroadcastss 0x34(%rdx),%ymm14
2276 196,66,45,168,238, //vfmadd213ps %ymm14,%ymm10,%ymm13
2277 196,98,125,24,82,68, //vbroadcastss 0x44(%rdx),%ymm10
2278 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
2279 196,195,21,74,193,0, //vblendvps %ymm0,%ymm9,%ymm13,%ymm0
2280 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
2281 197,116,89,233, //vmulps %ymm1,%ymm1,%ymm13
2282 196,65,124,40,251, //vmovaps %ymm11,%ymm15
2283 196,66,117,168,252, //vfmadd213ps %ymm12,%ymm1,%ymm15
2284 196,66,21,168,254, //vfmadd213ps %ymm14,%ymm13,%ymm15
2285 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
2286 196,195,5,74,201,16, //vblendvps %ymm1,%ymm9,%ymm15,%ymm1
2287 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
2288 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
2289 196,66,109,168,220, //vfmadd213ps %ymm12,%ymm2,%ymm11
2290 196,66,53,168,222, //vfmadd213ps %ymm14,%ymm9,%ymm11
2291 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
2292 196,195,37,74,208,32, //vblendvps %ymm2,%ymm8,%ymm11,%ymm2
2293 72,173, //lods %ds:(%rsi),%rax
2294 255,224, //jmpq *%rax
2295};
2296
2297CODE const uint8_t sk_to_srgb_hsw[] = {
2298 197,124,82,192, //vrsqrtps %ymm0,%ymm8
2299 196,65,124,83,200, //vrcpps %ymm8,%ymm9
2300 196,65,124,82,208, //vrsqrtps %ymm8,%ymm10
2301 196,98,125,24,66,72, //vbroadcastss 0x48(%rdx),%ymm8
2302 197,60,89,216, //vmulps %ymm0,%ymm8,%ymm11
2303 196,98,125,24,34, //vbroadcastss (%rdx),%ymm12
2304 196,98,125,24,106,76, //vbroadcastss 0x4c(%rdx),%ymm13
2305 196,98,125,24,114,80, //vbroadcastss 0x50(%rdx),%ymm14
2306 196,98,125,24,122,84, //vbroadcastss 0x54(%rdx),%ymm15
2307 196,66,13,168,207, //vfmadd213ps %ymm15,%ymm14,%ymm9
2308 196,66,21,184,202, //vfmadd231ps %ymm10,%ymm13,%ymm9
2309 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
2310 196,98,125,24,82,88, //vbroadcastss 0x58(%rdx),%ymm10
2311 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
2312 196,195,53,74,195,0, //vblendvps %ymm0,%ymm11,%ymm9,%ymm0
2313 197,124,82,201, //vrsqrtps %ymm1,%ymm9
2314 196,65,124,83,217, //vrcpps %ymm9,%ymm11
2315 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
2316 196,66,13,168,223, //vfmadd213ps %ymm15,%ymm14,%ymm11
2317 196,66,21,184,217, //vfmadd231ps %ymm9,%ymm13,%ymm11
2318 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
2319 196,65,28,93,219, //vminps %ymm11,%ymm12,%ymm11
2320 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
2321 196,195,37,74,201,16, //vblendvps %ymm1,%ymm9,%ymm11,%ymm1
2322 197,124,82,202, //vrsqrtps %ymm2,%ymm9
2323 196,65,124,83,217, //vrcpps %ymm9,%ymm11
2324 196,66,13,168,223, //vfmadd213ps %ymm15,%ymm14,%ymm11
2325 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
2326 196,66,21,184,217, //vfmadd231ps %ymm9,%ymm13,%ymm11
2327 196,65,28,93,203, //vminps %ymm11,%ymm12,%ymm9
2328 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
2329 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
2330 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
2331 72,173, //lods %ds:(%rsi),%rax
2332 255,224, //jmpq *%rax
2333};
2334
2335CODE const uint8_t sk_scale_1_float_hsw[] = {
2336 72,173, //lods %ds:(%rsi),%rax
2337 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
2338 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
2339 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
2340 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
2341 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
2342 72,173, //lods %ds:(%rsi),%rax
2343 255,224, //jmpq *%rax
2344};
2345
2346CODE const uint8_t sk_scale_u8_hsw[] = {
2347 73,137,200, //mov %rcx,%r8
2348 72,173, //lods %ds:(%rsi),%rax
2349 72,139,0, //mov (%rax),%rax
2350 72,1,248, //add %rdi,%rax
2351 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002352 117,56, //jne 462 <_sk_scale_u8_hsw+0x48>
Mike Klein894d5612017-03-07 07:59:52 -05002353 197,123,16,0, //vmovsd (%rax),%xmm8
2354 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
2355 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002356 184,129,128,128,59, //mov $0x3b808081,%eax
2357 197,121,110,200, //vmovd %eax,%xmm9
2358 196,66,125,24,201, //vbroadcastss %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05002359 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
2360 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
2361 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
2362 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
2363 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
2364 72,173, //lods %ds:(%rsi),%rax
2365 76,137,193, //mov %r8,%rcx
2366 255,224, //jmpq *%rax
2367 49,201, //xor %ecx,%ecx
2368 77,137,194, //mov %r8,%r10
2369 69,49,201, //xor %r9d,%r9d
2370 68,15,182,24, //movzbl (%rax),%r11d
2371 72,255,192, //inc %rax
2372 73,211,227, //shl %cl,%r11
2373 77,9,217, //or %r11,%r9
2374 72,131,193,8, //add $0x8,%rcx
2375 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002376 117,234, //jne 46a <_sk_scale_u8_hsw+0x50>
Mike Klein894d5612017-03-07 07:59:52 -05002377 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002378 235,167, //jmp 42e <_sk_scale_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05002379};
2380
2381CODE const uint8_t sk_lerp_1_float_hsw[] = {
2382 72,173, //lods %ds:(%rsi),%rax
2383 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
2384 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
2385 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
2386 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
2387 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
2388 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
2389 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
2390 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
2391 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
2392 72,173, //lods %ds:(%rsi),%rax
2393 255,224, //jmpq *%rax
2394};
2395
2396CODE const uint8_t sk_lerp_u8_hsw[] = {
2397 73,137,200, //mov %rcx,%r8
2398 72,173, //lods %ds:(%rsi),%rax
2399 72,139,0, //mov (%rax),%rax
2400 72,1,248, //add %rdi,%rax
2401 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002402 117,76, //jne 512 <_sk_lerp_u8_hsw+0x5c>
Mike Klein894d5612017-03-07 07:59:52 -05002403 197,123,16,0, //vmovsd (%rax),%xmm8
2404 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
2405 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002406 184,129,128,128,59, //mov $0x3b808081,%eax
2407 197,121,110,200, //vmovd %eax,%xmm9
2408 196,66,125,24,201, //vbroadcastss %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05002409 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
2410 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
2411 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
2412 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
2413 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
2414 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
2415 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
2416 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
2417 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
2418 72,173, //lods %ds:(%rsi),%rax
2419 76,137,193, //mov %r8,%rcx
2420 255,224, //jmpq *%rax
2421 49,201, //xor %ecx,%ecx
2422 77,137,194, //mov %r8,%r10
2423 69,49,201, //xor %r9d,%r9d
2424 68,15,182,24, //movzbl (%rax),%r11d
2425 72,255,192, //inc %rax
2426 73,211,227, //shl %cl,%r11
2427 77,9,217, //or %r11,%r9
2428 72,131,193,8, //add $0x8,%rcx
2429 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002430 117,234, //jne 51a <_sk_lerp_u8_hsw+0x64>
Mike Klein894d5612017-03-07 07:59:52 -05002431 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002432 235,147, //jmp 4ca <_sk_lerp_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05002433};
2434
2435CODE const uint8_t sk_lerp_565_hsw[] = {
2436 72,173, //lods %ds:(%rsi),%rax
2437 76,139,16, //mov (%rax),%r10
2438 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002439 15,133,132,0,0,0, //jne 5c9 <_sk_lerp_565_hsw+0x92>
Mike Klein894d5612017-03-07 07:59:52 -05002440 196,193,122,111,28,122, //vmovdqu (%r10,%rdi,2),%xmm3
2441 196,226,125,51,219, //vpmovzxwd %xmm3,%ymm3
2442 196,98,125,88,66,104, //vpbroadcastd 0x68(%rdx),%ymm8
2443 197,61,219,195, //vpand %ymm3,%ymm8,%ymm8
2444 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
2445 196,98,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm9
2446 196,65,52,89,192, //vmulps %ymm8,%ymm9,%ymm8
2447 196,98,125,88,74,108, //vpbroadcastd 0x6c(%rdx),%ymm9
2448 197,53,219,203, //vpand %ymm3,%ymm9,%ymm9
2449 196,65,124,91,201, //vcvtdq2ps %ymm9,%ymm9
2450 196,98,125,24,82,120, //vbroadcastss 0x78(%rdx),%ymm10
2451 196,65,44,89,201, //vmulps %ymm9,%ymm10,%ymm9
2452 196,98,125,88,82,112, //vpbroadcastd 0x70(%rdx),%ymm10
2453 197,173,219,219, //vpand %ymm3,%ymm10,%ymm3
2454 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
2455 196,98,125,24,82,124, //vbroadcastss 0x7c(%rdx),%ymm10
2456 197,172,89,219, //vmulps %ymm3,%ymm10,%ymm3
2457 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
2458 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
2459 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
2460 196,226,53,168,205, //vfmadd213ps %ymm5,%ymm9,%ymm1
2461 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
2462 196,226,101,168,214, //vfmadd213ps %ymm6,%ymm3,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002463 184,0,0,128,63, //mov $0x3f800000,%eax
2464 197,249,110,216, //vmovd %eax,%xmm3
2465 196,226,125,24,219, //vbroadcastss %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05002466 72,173, //lods %ds:(%rsi),%rax
2467 255,224, //jmpq *%rax
2468 65,137,200, //mov %ecx,%r8d
2469 65,128,224,7, //and $0x7,%r8b
2470 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
2471 65,254,200, //dec %r8b
2472 69,15,182,192, //movzbl %r8b,%r8d
2473 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002474 15,135,102,255,255,255, //ja 54b <_sk_lerp_565_hsw+0x14>
2475 76,141,13,76,0,0,0, //lea 0x4c(%rip),%r9 # 638 <_sk_lerp_565_hsw+0x101>
Mike Klein894d5612017-03-07 07:59:52 -05002476 75,99,4,129, //movslq (%r9,%r8,4),%rax
2477 76,1,200, //add %r9,%rax
2478 255,224, //jmpq *%rax
2479 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
2480 196,193,97,196,92,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm3
2481 196,193,97,196,92,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm3,%xmm3
2482 196,193,97,196,92,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm3,%xmm3
2483 196,193,97,196,92,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm3,%xmm3
2484 196,193,97,196,92,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
2485 196,193,97,196,92,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
2486 196,193,97,196,28,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002487 233,22,255,255,255, //jmpq 54b <_sk_lerp_565_hsw+0x14>
2488 15,31,0, //nopl (%rax)
2489 241, //icebp
Mike Klein894d5612017-03-07 07:59:52 -05002490 255, //(bad)
2491 255, //(bad)
2492 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002493 233,255,255,255,225, //jmpq ffffffffe2000640 <_sk_linear_gradient_2stops_hsw+0xffffffffe1fff55e>
Mike Klein894d5612017-03-07 07:59:52 -05002494 255, //(bad)
2495 255, //(bad)
2496 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002497 217,255, //fcos
Mike Klein894d5612017-03-07 07:59:52 -05002498 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002499 255,209, //callq *%rcx
Mike Klein894d5612017-03-07 07:59:52 -05002500 255, //(bad)
2501 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002502 255,201, //dec %ecx
Mike Klein894d5612017-03-07 07:59:52 -05002503 255, //(bad)
2504 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002505 255, //(bad)
2506 189, //.byte 0xbd
Mike Klein894d5612017-03-07 07:59:52 -05002507 255, //(bad)
2508 255, //(bad)
2509 255, //.byte 0xff
2510};
2511
2512CODE const uint8_t sk_load_tables_hsw[] = {
2513 73,137,200, //mov %rcx,%r8
2514 72,173, //lods %ds:(%rsi),%rax
2515 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
2516 76,3,8, //add (%rax),%r9
2517 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002518 117,106, //jne 6d3 <_sk_load_tables_hsw+0x7f>
Mike Klein894d5612017-03-07 07:59:52 -05002519 196,193,126,111,25, //vmovdqu (%r9),%ymm3
2520 196,226,125,88,82,16, //vpbroadcastd 0x10(%rdx),%ymm2
2521 197,237,219,203, //vpand %ymm3,%ymm2,%ymm1
2522 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
2523 72,139,72,8, //mov 0x8(%rax),%rcx
2524 76,139,72,16, //mov 0x10(%rax),%r9
2525 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
2526 196,226,53,146,4,137, //vgatherdps %ymm9,(%rcx,%ymm1,4),%ymm0
2527 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
2528 197,109,219,201, //vpand %ymm1,%ymm2,%ymm9
2529 196,65,45,118,210, //vpcmpeqd %ymm10,%ymm10,%ymm10
2530 196,130,45,146,12,137, //vgatherdps %ymm10,(%r9,%ymm9,4),%ymm1
2531 72,139,64,24, //mov 0x18(%rax),%rax
2532 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
2533 196,65,109,219,201, //vpand %ymm9,%ymm2,%ymm9
2534 196,162,61,146,20,136, //vgatherdps %ymm8,(%rax,%ymm9,4),%ymm2
2535 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
2536 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
2537 196,98,125,24,66,12, //vbroadcastss 0xc(%rdx),%ymm8
2538 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
2539 72,173, //lods %ds:(%rsi),%rax
2540 76,137,193, //mov %r8,%rcx
2541 255,224, //jmpq *%rax
2542 185,8,0,0,0, //mov $0x8,%ecx
2543 68,41,193, //sub %r8d,%ecx
2544 192,225,3, //shl $0x3,%cl
2545 73,199,194,255,255,255,255, //mov $0xffffffffffffffff,%r10
2546 73,211,234, //shr %cl,%r10
2547 196,193,249,110,194, //vmovq %r10,%xmm0
2548 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
2549 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002550 233,114,255,255,255, //jmpq 66e <_sk_load_tables_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05002551};
2552
2553CODE const uint8_t sk_load_a8_hsw[] = {
2554 73,137,200, //mov %rcx,%r8
2555 72,173, //lods %ds:(%rsi),%rax
2556 72,139,0, //mov (%rax),%rax
2557 72,1,248, //add %rdi,%rax
2558 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002559 117,42, //jne 736 <_sk_load_a8_hsw+0x3a>
Mike Klein894d5612017-03-07 07:59:52 -05002560 197,251,16,0, //vmovsd (%rax),%xmm0
2561 196,226,125,49,192, //vpmovzxbd %xmm0,%ymm0
2562 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
2563 196,226,125,24,74,12, //vbroadcastss 0xc(%rdx),%ymm1
2564 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
2565 72,173, //lods %ds:(%rsi),%rax
2566 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
2567 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
2568 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
2569 76,137,193, //mov %r8,%rcx
2570 255,224, //jmpq *%rax
2571 49,201, //xor %ecx,%ecx
2572 77,137,194, //mov %r8,%r10
2573 69,49,201, //xor %r9d,%r9d
2574 68,15,182,24, //movzbl (%rax),%r11d
2575 72,255,192, //inc %rax
2576 73,211,227, //shl %cl,%r11
2577 77,9,217, //or %r11,%r9
2578 72,131,193,8, //add $0x8,%rcx
2579 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002580 117,234, //jne 73e <_sk_load_a8_hsw+0x42>
Mike Klein894d5612017-03-07 07:59:52 -05002581 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002582 235,181, //jmp 710 <_sk_load_a8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05002583};
2584
2585CODE const uint8_t sk_store_a8_hsw[] = {
2586 72,173, //lods %ds:(%rsi),%rax
2587 76,139,8, //mov (%rax),%r9
2588 196,98,125,24,66,8, //vbroadcastss 0x8(%rdx),%ymm8
2589 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
2590 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
2591 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
2592 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
2593 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
2594 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002595 117,10, //jne 78e <_sk_store_a8_hsw+0x33>
Mike Klein894d5612017-03-07 07:59:52 -05002596 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
2597 72,173, //lods %ds:(%rsi),%rax
2598 255,224, //jmpq *%rax
2599 137,200, //mov %ecx,%eax
2600 36,7, //and $0x7,%al
2601 254,200, //dec %al
2602 68,15,182,192, //movzbl %al,%r8d
2603 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002604 119,236, //ja 78a <_sk_store_a8_hsw+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05002605 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002606 76,141,21,66,0,0,0, //lea 0x42(%rip),%r10 # 7ec <_sk_store_a8_hsw+0x91>
Mike Klein894d5612017-03-07 07:59:52 -05002607 75,99,4,130, //movslq (%r10,%r8,4),%rax
2608 76,1,208, //add %r10,%rax
2609 255,224, //jmpq *%rax
2610 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
2611 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
2612 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
2613 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
2614 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
2615 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
2616 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002617 235,158, //jmp 78a <_sk_store_a8_hsw+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05002618 247,255, //idiv %edi
2619 255, //(bad)
2620 255, //(bad)
2621 239, //out %eax,(%dx)
2622 255, //(bad)
2623 255, //(bad)
2624 255,231, //jmpq *%rdi
2625 255, //(bad)
2626 255, //(bad)
2627 255, //(bad)
2628 223,255, //(bad)
2629 255, //(bad)
2630 255,215, //callq *%rdi
2631 255, //(bad)
2632 255, //(bad)
2633 255,207, //dec %edi
2634 255, //(bad)
2635 255, //(bad)
2636 255,199, //inc %edi
2637 255, //(bad)
2638 255, //(bad)
2639 255, //.byte 0xff
2640};
2641
2642CODE const uint8_t sk_load_565_hsw[] = {
2643 72,173, //lods %ds:(%rsi),%rax
2644 76,139,16, //mov (%rax),%r10
2645 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002646 117,92, //jne 86e <_sk_load_565_hsw+0x66>
Mike Klein894d5612017-03-07 07:59:52 -05002647 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
2648 196,226,125,51,208, //vpmovzxwd %xmm0,%ymm2
2649 196,226,125,88,66,104, //vpbroadcastd 0x68(%rdx),%ymm0
2650 197,253,219,194, //vpand %ymm2,%ymm0,%ymm0
2651 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
2652 196,226,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm1
2653 197,244,89,192, //vmulps %ymm0,%ymm1,%ymm0
2654 196,226,125,88,74,108, //vpbroadcastd 0x6c(%rdx),%ymm1
2655 197,245,219,202, //vpand %ymm2,%ymm1,%ymm1
2656 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
2657 196,226,125,24,90,120, //vbroadcastss 0x78(%rdx),%ymm3
2658 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
2659 196,226,125,88,90,112, //vpbroadcastd 0x70(%rdx),%ymm3
2660 197,229,219,210, //vpand %ymm2,%ymm3,%ymm2
2661 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
2662 196,226,125,24,90,124, //vbroadcastss 0x7c(%rdx),%ymm3
2663 197,228,89,210, //vmulps %ymm2,%ymm3,%ymm2
2664 196,226,125,24,26, //vbroadcastss (%rdx),%ymm3
2665 72,173, //lods %ds:(%rsi),%rax
2666 255,224, //jmpq *%rax
2667 65,137,200, //mov %ecx,%r8d
2668 65,128,224,7, //and $0x7,%r8b
2669 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
2670 65,254,200, //dec %r8b
2671 69,15,182,192, //movzbl %r8b,%r8d
2672 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002673 119,146, //ja 818 <_sk_load_565_hsw+0x10>
2674 76,141,13,75,0,0,0, //lea 0x4b(%rip),%r9 # 8d8 <_sk_load_565_hsw+0xd0>
Mike Klein894d5612017-03-07 07:59:52 -05002675 75,99,4,129, //movslq (%r9,%r8,4),%rax
2676 76,1,200, //add %r9,%rax
2677 255,224, //jmpq *%rax
2678 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
2679 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
2680 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
2681 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
2682 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
2683 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
2684 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
2685 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002686 233,66,255,255,255, //jmpq 818 <_sk_load_565_hsw+0x10>
Mike Klein894d5612017-03-07 07:59:52 -05002687 102,144, //xchg %ax,%ax
2688 242,255, //repnz (bad)
2689 255, //(bad)
2690 255, //(bad)
2691 234, //(bad)
2692 255, //(bad)
2693 255, //(bad)
2694 255,226, //jmpq *%rdx
2695 255, //(bad)
2696 255, //(bad)
2697 255, //(bad)
2698 218,255, //(bad)
2699 255, //(bad)
2700 255,210, //callq *%rdx
2701 255, //(bad)
2702 255, //(bad)
2703 255,202, //dec %edx
2704 255, //(bad)
2705 255, //(bad)
2706 255, //(bad)
2707 190, //.byte 0xbe
2708 255, //(bad)
2709 255, //(bad)
2710 255, //.byte 0xff
2711};
2712
2713CODE const uint8_t sk_store_565_hsw[] = {
2714 72,173, //lods %ds:(%rsi),%rax
2715 76,139,8, //mov (%rax),%r9
2716 196,98,125,24,130,128,0,0,0, //vbroadcastss 0x80(%rdx),%ymm8
2717 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
2718 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
2719 196,193,53,114,241,11, //vpslld $0xb,%ymm9,%ymm9
2720 196,98,125,24,146,132,0,0,0, //vbroadcastss 0x84(%rdx),%ymm10
2721 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
2722 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
2723 196,193,45,114,242,5, //vpslld $0x5,%ymm10,%ymm10
2724 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
2725 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
2726 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
2727 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
2728 196,67,125,57,193,1, //vextracti128 $0x1,%ymm8,%xmm9
2729 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
2730 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002731 117,10, //jne 956 <_sk_store_565_hsw+0x62>
Mike Klein894d5612017-03-07 07:59:52 -05002732 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
2733 72,173, //lods %ds:(%rsi),%rax
2734 255,224, //jmpq *%rax
2735 137,200, //mov %ecx,%eax
2736 36,7, //and $0x7,%al
2737 254,200, //dec %al
2738 68,15,182,192, //movzbl %al,%r8d
2739 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002740 119,236, //ja 952 <_sk_store_565_hsw+0x5e>
2741 76,141,21,71,0,0,0, //lea 0x47(%rip),%r10 # 9b4 <_sk_store_565_hsw+0xc0>
Mike Klein894d5612017-03-07 07:59:52 -05002742 75,99,4,130, //movslq (%r10,%r8,4),%rax
2743 76,1,208, //add %r10,%rax
2744 255,224, //jmpq *%rax
2745 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
2746 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
2747 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
2748 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
2749 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
2750 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
2751 197,121,126,192, //vmovd %xmm8,%eax
2752 102,65,137,4,121, //mov %ax,(%r9,%rdi,2)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002753 235,161, //jmp 952 <_sk_store_565_hsw+0x5e>
Mike Klein894d5612017-03-07 07:59:52 -05002754 15,31,0, //nopl (%rax)
2755 242,255, //repnz (bad)
2756 255, //(bad)
2757 255, //(bad)
2758 234, //(bad)
2759 255, //(bad)
2760 255, //(bad)
2761 255,226, //jmpq *%rdx
2762 255, //(bad)
2763 255, //(bad)
2764 255, //(bad)
2765 218,255, //(bad)
2766 255, //(bad)
2767 255,210, //callq *%rdx
2768 255, //(bad)
2769 255, //(bad)
2770 255,202, //dec %edx
2771 255, //(bad)
2772 255, //(bad)
2773 255,194, //inc %edx
2774 255, //(bad)
2775 255, //(bad)
2776 255, //.byte 0xff
2777};
2778
2779CODE const uint8_t sk_load_8888_hsw[] = {
2780 73,137,200, //mov %rcx,%r8
2781 72,173, //lods %ds:(%rsi),%rax
2782 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
2783 76,3,8, //add (%rax),%r9
2784 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002785 117,104, //jne a4d <_sk_load_8888_hsw+0x7d>
Mike Klein894d5612017-03-07 07:59:52 -05002786 196,193,126,111,25, //vmovdqu (%r9),%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002787 184,255,0,0,0, //mov $0xff,%eax
2788 197,249,110,192, //vmovd %eax,%xmm0
2789 196,226,125,88,208, //vpbroadcastd %xmm0,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002790 197,237,219,195, //vpand %ymm3,%ymm2,%ymm0
2791 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002792 184,129,128,128,59, //mov $0x3b808081,%eax
2793 197,249,110,200, //vmovd %eax,%xmm1
2794 196,98,125,24,193, //vbroadcastss %xmm1,%ymm8
2795 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05002796 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
2797 197,237,219,201, //vpand %ymm1,%ymm2,%ymm1
2798 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002799 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05002800 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
2801 196,193,109,219,209, //vpand %ymm9,%ymm2,%ymm2
2802 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002803 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05002804 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
2805 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
2806 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
2807 72,173, //lods %ds:(%rsi),%rax
2808 76,137,193, //mov %r8,%rcx
2809 255,224, //jmpq *%rax
2810 185,8,0,0,0, //mov $0x8,%ecx
2811 68,41,193, //sub %r8d,%ecx
2812 192,225,3, //shl $0x3,%cl
2813 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
2814 72,211,232, //shr %cl,%rax
2815 196,225,249,110,192, //vmovq %rax,%xmm0
2816 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
2817 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002818 233,116,255,255,255, //jmpq 9ea <_sk_load_8888_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05002819};
2820
2821CODE const uint8_t sk_store_8888_hsw[] = {
2822 73,137,200, //mov %rcx,%r8
2823 72,173, //lods %ds:(%rsi),%rax
2824 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
2825 76,3,8, //add (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002826 184,0,0,127,67, //mov $0x437f0000,%eax
2827 197,121,110,192, //vmovd %eax,%xmm8
2828 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05002829 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
2830 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
2831 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
2832 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
2833 196,193,45,114,242,8, //vpslld $0x8,%ymm10,%ymm10
2834 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
2835 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
2836 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
2837 196,193,45,114,242,16, //vpslld $0x10,%ymm10,%ymm10
2838 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
2839 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
2840 196,193,61,114,240,24, //vpslld $0x18,%ymm8,%ymm8
2841 196,65,45,235,192, //vpor %ymm8,%ymm10,%ymm8
2842 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
2843 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002844 117,12, //jne aea <_sk_store_8888_hsw+0x74>
Mike Klein894d5612017-03-07 07:59:52 -05002845 196,65,126,127,1, //vmovdqu %ymm8,(%r9)
2846 72,173, //lods %ds:(%rsi),%rax
2847 76,137,193, //mov %r8,%rcx
2848 255,224, //jmpq *%rax
2849 185,8,0,0,0, //mov $0x8,%ecx
2850 68,41,193, //sub %r8d,%ecx
2851 192,225,3, //shl $0x3,%cl
2852 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
2853 72,211,232, //shr %cl,%rax
2854 196,97,249,110,200, //vmovq %rax,%xmm9
2855 196,66,125,33,201, //vpmovsxbd %xmm9,%ymm9
2856 196,66,53,142,1, //vpmaskmovd %ymm8,%ymm9,(%r9)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002857 235,211, //jmp ae3 <_sk_store_8888_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05002858};
2859
2860CODE const uint8_t sk_load_f16_hsw[] = {
2861 72,173, //lods %ds:(%rsi),%rax
2862 72,139,0, //mov (%rax),%rax
2863 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002864 117,97, //jne b7b <_sk_load_f16_hsw+0x6b>
Mike Klein894d5612017-03-07 07:59:52 -05002865 197,249,16,12,248, //vmovupd (%rax,%rdi,8),%xmm1
2866 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
2867 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
2868 197,121,16,68,248,48, //vmovupd 0x30(%rax,%rdi,8),%xmm8
2869 197,241,97,194, //vpunpcklwd %xmm2,%xmm1,%xmm0
2870 197,241,105,202, //vpunpckhwd %xmm2,%xmm1,%xmm1
2871 196,193,97,97,208, //vpunpcklwd %xmm8,%xmm3,%xmm2
2872 196,193,97,105,216, //vpunpckhwd %xmm8,%xmm3,%xmm3
2873 197,121,97,193, //vpunpcklwd %xmm1,%xmm0,%xmm8
2874 197,121,105,201, //vpunpckhwd %xmm1,%xmm0,%xmm9
2875 197,233,97,203, //vpunpcklwd %xmm3,%xmm2,%xmm1
2876 197,233,105,219, //vpunpckhwd %xmm3,%xmm2,%xmm3
2877 197,185,108,193, //vpunpcklqdq %xmm1,%xmm8,%xmm0
2878 196,226,125,19,192, //vcvtph2ps %xmm0,%ymm0
2879 197,185,109,201, //vpunpckhqdq %xmm1,%xmm8,%xmm1
2880 196,226,125,19,201, //vcvtph2ps %xmm1,%ymm1
2881 197,177,108,211, //vpunpcklqdq %xmm3,%xmm9,%xmm2
2882 196,226,125,19,210, //vcvtph2ps %xmm2,%ymm2
2883 197,177,109,219, //vpunpckhqdq %xmm3,%xmm9,%xmm3
2884 196,226,125,19,219, //vcvtph2ps %xmm3,%ymm3
2885 72,173, //lods %ds:(%rsi),%rax
2886 255,224, //jmpq *%rax
2887 197,251,16,12,248, //vmovsd (%rax,%rdi,8),%xmm1
2888 196,65,57,87,192, //vxorpd %xmm8,%xmm8,%xmm8
2889 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002890 117,6, //jne b91 <_sk_load_f16_hsw+0x81>
Mike Klein894d5612017-03-07 07:59:52 -05002891 197,250,126,201, //vmovq %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002892 235,30, //jmp baf <_sk_load_f16_hsw+0x9f>
Mike Klein894d5612017-03-07 07:59:52 -05002893 197,241,22,76,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm1,%xmm1
2894 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002895 114,18, //jb baf <_sk_load_f16_hsw+0x9f>
Mike Klein894d5612017-03-07 07:59:52 -05002896 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
2897 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002898 117,19, //jne bbc <_sk_load_f16_hsw+0xac>
Mike Klein894d5612017-03-07 07:59:52 -05002899 197,250,126,210, //vmovq %xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002900 235,46, //jmp bdd <_sk_load_f16_hsw+0xcd>
Mike Klein894d5612017-03-07 07:59:52 -05002901 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
2902 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002903 233,117,255,255,255, //jmpq b31 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05002904 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
2905 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002906 114,21, //jb bdd <_sk_load_f16_hsw+0xcd>
Mike Klein894d5612017-03-07 07:59:52 -05002907 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
2908 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002909 117,18, //jne be6 <_sk_load_f16_hsw+0xd6>
Mike Klein894d5612017-03-07 07:59:52 -05002910 197,250,126,219, //vmovq %xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002911 233,84,255,255,255, //jmpq b31 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05002912 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002913 233,75,255,255,255, //jmpq b31 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05002914 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
2915 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002916 15,130,59,255,255,255, //jb b31 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05002917 197,123,16,68,248,48, //vmovsd 0x30(%rax,%rdi,8),%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002918 233,48,255,255,255, //jmpq b31 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05002919};
2920
2921CODE const uint8_t sk_store_f16_hsw[] = {
2922 72,173, //lods %ds:(%rsi),%rax
2923 72,139,0, //mov (%rax),%rax
2924 196,195,125,29,192,4, //vcvtps2ph $0x4,%ymm0,%xmm8
2925 196,195,125,29,201,4, //vcvtps2ph $0x4,%ymm1,%xmm9
2926 196,195,125,29,210,4, //vcvtps2ph $0x4,%ymm2,%xmm10
2927 196,195,125,29,219,4, //vcvtps2ph $0x4,%ymm3,%xmm11
2928 196,65,57,97,225, //vpunpcklwd %xmm9,%xmm8,%xmm12
2929 196,65,57,105,193, //vpunpckhwd %xmm9,%xmm8,%xmm8
2930 196,65,41,97,203, //vpunpcklwd %xmm11,%xmm10,%xmm9
2931 196,65,41,105,235, //vpunpckhwd %xmm11,%xmm10,%xmm13
2932 196,65,25,98,217, //vpunpckldq %xmm9,%xmm12,%xmm11
2933 196,65,25,106,209, //vpunpckhdq %xmm9,%xmm12,%xmm10
2934 196,65,57,98,205, //vpunpckldq %xmm13,%xmm8,%xmm9
2935 196,65,57,106,197, //vpunpckhdq %xmm13,%xmm8,%xmm8
2936 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002937 117,27, //jne c66 <_sk_store_f16_hsw+0x65>
Mike Klein894d5612017-03-07 07:59:52 -05002938 197,120,17,28,248, //vmovups %xmm11,(%rax,%rdi,8)
2939 197,120,17,84,248,16, //vmovups %xmm10,0x10(%rax,%rdi,8)
2940 197,120,17,76,248,32, //vmovups %xmm9,0x20(%rax,%rdi,8)
2941 197,122,127,68,248,48, //vmovdqu %xmm8,0x30(%rax,%rdi,8)
2942 72,173, //lods %ds:(%rsi),%rax
2943 255,224, //jmpq *%rax
2944 197,121,214,28,248, //vmovq %xmm11,(%rax,%rdi,8)
2945 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002946 116,241, //je c62 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05002947 197,121,23,92,248,8, //vmovhpd %xmm11,0x8(%rax,%rdi,8)
2948 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002949 114,229, //jb c62 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05002950 197,121,214,84,248,16, //vmovq %xmm10,0x10(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002951 116,221, //je c62 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05002952 197,121,23,84,248,24, //vmovhpd %xmm10,0x18(%rax,%rdi,8)
2953 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002954 114,209, //jb c62 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05002955 197,121,214,76,248,32, //vmovq %xmm9,0x20(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002956 116,201, //je c62 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05002957 197,121,23,76,248,40, //vmovhpd %xmm9,0x28(%rax,%rdi,8)
2958 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002959 114,189, //jb c62 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05002960 197,121,214,68,248,48, //vmovq %xmm8,0x30(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002961 235,181, //jmp c62 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05002962};
2963
2964CODE const uint8_t sk_store_f32_hsw[] = {
2965 72,173, //lods %ds:(%rsi),%rax
2966 76,139,0, //mov (%rax),%r8
2967 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
2968 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
2969 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
2970 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
2971 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
2972 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
2973 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
2974 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
2975 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
2976 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002977 117,55, //jne d1a <_sk_store_f32_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05002978 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
2979 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
2980 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
2981 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
2982 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
2983 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
2984 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
2985 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
2986 72,173, //lods %ds:(%rsi),%rax
2987 255,224, //jmpq *%rax
2988 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
2989 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002990 116,240, //je d16 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05002991 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
2992 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002993 114,227, //jb d16 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05002994 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002995 116,218, //je d16 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05002996 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
2997 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05002998 114,205, //jb d16 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05002999 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003000 116,195, //je d16 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003001 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
3002 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003003 114,181, //jb d16 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003004 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003005 235,171, //jmp d16 <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05003006};
3007
3008CODE const uint8_t sk_clamp_x_hsw[] = {
3009 72,173, //lods %ds:(%rsi),%rax
3010 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3011 197,188,95,192, //vmaxps %ymm0,%ymm8,%ymm0
3012 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
3013 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
3014 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
3015 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
3016 72,173, //lods %ds:(%rsi),%rax
3017 255,224, //jmpq *%rax
3018};
3019
3020CODE const uint8_t sk_clamp_y_hsw[] = {
3021 72,173, //lods %ds:(%rsi),%rax
3022 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3023 197,188,95,201, //vmaxps %ymm1,%ymm8,%ymm1
3024 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
3025 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
3026 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
3027 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
3028 72,173, //lods %ds:(%rsi),%rax
3029 255,224, //jmpq *%rax
3030};
3031
3032CODE const uint8_t sk_repeat_x_hsw[] = {
3033 72,173, //lods %ds:(%rsi),%rax
3034 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3035 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
3036 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
3037 196,98,61,172,200, //vfnmadd213ps %ymm0,%ymm8,%ymm9
3038 197,253,118,192, //vpcmpeqd %ymm0,%ymm0,%ymm0
3039 197,189,254,192, //vpaddd %ymm0,%ymm8,%ymm0
3040 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
3041 72,173, //lods %ds:(%rsi),%rax
3042 255,224, //jmpq *%rax
3043};
3044
3045CODE const uint8_t sk_repeat_y_hsw[] = {
3046 72,173, //lods %ds:(%rsi),%rax
3047 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3048 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
3049 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
3050 196,98,61,172,201, //vfnmadd213ps %ymm1,%ymm8,%ymm9
3051 197,245,118,201, //vpcmpeqd %ymm1,%ymm1,%ymm1
3052 197,189,254,201, //vpaddd %ymm1,%ymm8,%ymm1
3053 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
3054 72,173, //lods %ds:(%rsi),%rax
3055 255,224, //jmpq *%rax
3056};
3057
3058CODE const uint8_t sk_mirror_x_hsw[] = {
3059 72,173, //lods %ds:(%rsi),%rax
3060 197,122,16,0, //vmovss (%rax),%xmm8
3061 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
3062 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
3063 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
3064 196,226,125,24,192, //vbroadcastss %xmm0,%ymm0
3065 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
3066 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
3067 196,66,125,172,194, //vfnmadd213ps %ymm10,%ymm0,%ymm8
3068 196,193,60,92,193, //vsubps %ymm9,%ymm8,%ymm0
3069 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3070 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
3071 197,188,84,192, //vandps %ymm0,%ymm8,%ymm0
3072 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
3073 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
3074 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
3075 72,173, //lods %ds:(%rsi),%rax
3076 255,224, //jmpq *%rax
3077};
3078
3079CODE const uint8_t sk_mirror_y_hsw[] = {
3080 72,173, //lods %ds:(%rsi),%rax
3081 197,122,16,0, //vmovss (%rax),%xmm8
3082 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
3083 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
3084 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
3085 196,226,125,24,201, //vbroadcastss %xmm1,%ymm1
3086 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
3087 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
3088 196,66,117,172,194, //vfnmadd213ps %ymm10,%ymm1,%ymm8
3089 196,193,60,92,201, //vsubps %ymm9,%ymm8,%ymm1
3090 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3091 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
3092 197,188,84,201, //vandps %ymm1,%ymm8,%ymm1
3093 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
3094 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
3095 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
3096 72,173, //lods %ds:(%rsi),%rax
3097 255,224, //jmpq *%rax
3098};
3099
Mike Kleine9ed07d2017-03-07 12:28:11 -05003100CODE const uint8_t sk_luminance_to_alpha_hsw[] = {
3101 196,98,125,24,130,136,0,0,0, //vbroadcastss 0x88(%rdx),%ymm8
3102 196,226,125,24,154,140,0,0,0, //vbroadcastss 0x8c(%rdx),%ymm3
3103 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
3104 196,98,125,168,193, //vfmadd213ps %ymm1,%ymm0,%ymm8
3105 196,226,125,24,154,144,0,0,0, //vbroadcastss 0x90(%rdx),%ymm3
3106 196,194,109,168,216, //vfmadd213ps %ymm8,%ymm2,%ymm3
3107 72,173, //lods %ds:(%rsi),%rax
3108 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3109 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3110 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3111 255,224, //jmpq *%rax
3112};
3113
Mike Klein894d5612017-03-07 07:59:52 -05003114CODE const uint8_t sk_matrix_2x3_hsw[] = {
3115 72,173, //lods %ds:(%rsi),%rax
3116 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
3117 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
3118 196,98,125,24,64,16, //vbroadcastss 0x10(%rax),%ymm8
3119 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
3120 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
3121 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
3122 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
3123 196,98,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm9
3124 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
3125 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
3126 72,173, //lods %ds:(%rsi),%rax
3127 197,124,41,192, //vmovaps %ymm8,%ymm0
3128 197,124,41,201, //vmovaps %ymm9,%ymm1
3129 255,224, //jmpq *%rax
3130};
3131
3132CODE const uint8_t sk_matrix_3x4_hsw[] = {
3133 72,173, //lods %ds:(%rsi),%rax
3134 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
3135 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
3136 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
3137 196,98,125,24,64,36, //vbroadcastss 0x24(%rax),%ymm8
3138 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
3139 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
3140 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
3141 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
3142 196,98,125,24,88,16, //vbroadcastss 0x10(%rax),%ymm11
3143 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
3144 196,98,125,24,72,40, //vbroadcastss 0x28(%rax),%ymm9
3145 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
3146 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
3147 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
3148 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
3149 196,98,125,24,96,20, //vbroadcastss 0x14(%rax),%ymm12
3150 196,98,125,24,104,32, //vbroadcastss 0x20(%rax),%ymm13
3151 196,98,125,24,80,44, //vbroadcastss 0x2c(%rax),%ymm10
3152 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
3153 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
3154 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
3155 72,173, //lods %ds:(%rsi),%rax
3156 197,124,41,192, //vmovaps %ymm8,%ymm0
3157 197,124,41,201, //vmovaps %ymm9,%ymm1
3158 197,124,41,210, //vmovaps %ymm10,%ymm2
3159 255,224, //jmpq *%rax
3160};
3161
Mike Kleine9ed07d2017-03-07 12:28:11 -05003162CODE const uint8_t sk_matrix_4x5_hsw[] = {
3163 72,173, //lods %ds:(%rsi),%rax
3164 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
3165 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
3166 196,98,125,24,88,32, //vbroadcastss 0x20(%rax),%ymm11
3167 196,98,125,24,96,48, //vbroadcastss 0x30(%rax),%ymm12
3168 196,98,125,24,64,64, //vbroadcastss 0x40(%rax),%ymm8
3169 196,66,101,184,196, //vfmadd231ps %ymm12,%ymm3,%ymm8
3170 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
3171 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
3172 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
3173 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
3174 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
3175 196,98,125,24,96,36, //vbroadcastss 0x24(%rax),%ymm12
3176 196,98,125,24,104,52, //vbroadcastss 0x34(%rax),%ymm13
3177 196,98,125,24,72,68, //vbroadcastss 0x44(%rax),%ymm9
3178 196,66,101,184,205, //vfmadd231ps %ymm13,%ymm3,%ymm9
3179 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
3180 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
3181 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
3182 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
3183 196,98,125,24,96,24, //vbroadcastss 0x18(%rax),%ymm12
3184 196,98,125,24,104,40, //vbroadcastss 0x28(%rax),%ymm13
3185 196,98,125,24,112,56, //vbroadcastss 0x38(%rax),%ymm14
3186 196,98,125,24,80,72, //vbroadcastss 0x48(%rax),%ymm10
3187 196,66,101,184,214, //vfmadd231ps %ymm14,%ymm3,%ymm10
3188 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
3189 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
3190 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
3191 196,98,125,24,96,12, //vbroadcastss 0xc(%rax),%ymm12
3192 196,98,125,24,104,28, //vbroadcastss 0x1c(%rax),%ymm13
3193 196,98,125,24,112,44, //vbroadcastss 0x2c(%rax),%ymm14
3194 196,98,125,24,120,60, //vbroadcastss 0x3c(%rax),%ymm15
3195 196,98,125,24,88,76, //vbroadcastss 0x4c(%rax),%ymm11
3196 196,66,101,184,223, //vfmadd231ps %ymm15,%ymm3,%ymm11
3197 196,66,109,184,222, //vfmadd231ps %ymm14,%ymm2,%ymm11
3198 196,66,117,184,221, //vfmadd231ps %ymm13,%ymm1,%ymm11
3199 196,66,125,184,220, //vfmadd231ps %ymm12,%ymm0,%ymm11
3200 72,173, //lods %ds:(%rsi),%rax
3201 197,124,41,192, //vmovaps %ymm8,%ymm0
3202 197,124,41,201, //vmovaps %ymm9,%ymm1
3203 197,124,41,210, //vmovaps %ymm10,%ymm2
3204 197,124,41,219, //vmovaps %ymm11,%ymm3
3205 255,224, //jmpq *%rax
3206};
3207
Mike Klein894d5612017-03-07 07:59:52 -05003208CODE const uint8_t sk_matrix_perspective_hsw[] = {
3209 72,173, //lods %ds:(%rsi),%rax
3210 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3211 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
3212 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
3213 196,66,117,184,209, //vfmadd231ps %ymm9,%ymm1,%ymm10
3214 196,66,125,184,208, //vfmadd231ps %ymm8,%ymm0,%ymm10
3215 196,98,125,24,64,12, //vbroadcastss 0xc(%rax),%ymm8
3216 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
3217 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
3218 196,66,117,184,217, //vfmadd231ps %ymm9,%ymm1,%ymm11
3219 196,66,125,184,216, //vfmadd231ps %ymm8,%ymm0,%ymm11
3220 196,98,125,24,64,24, //vbroadcastss 0x18(%rax),%ymm8
3221 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
3222 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
3223 196,66,117,184,225, //vfmadd231ps %ymm9,%ymm1,%ymm12
3224 196,66,125,184,224, //vfmadd231ps %ymm8,%ymm0,%ymm12
3225 196,193,124,83,204, //vrcpps %ymm12,%ymm1
3226 197,172,89,193, //vmulps %ymm1,%ymm10,%ymm0
3227 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
3228 72,173, //lods %ds:(%rsi),%rax
3229 255,224, //jmpq *%rax
3230};
3231
3232CODE const uint8_t sk_linear_gradient_2stops_hsw[] = {
3233 72,173, //lods %ds:(%rsi),%rax
3234 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
3235 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3236 196,98,125,184,193, //vfmadd231ps %ymm1,%ymm0,%ymm8
3237 196,226,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm2
3238 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
3239 196,226,125,184,202, //vfmadd231ps %ymm2,%ymm0,%ymm1
3240 196,226,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm3
3241 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
3242 196,226,125,184,211, //vfmadd231ps %ymm3,%ymm0,%ymm2
3243 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
3244 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
3245 196,194,125,184,217, //vfmadd231ps %ymm9,%ymm0,%ymm3
3246 72,173, //lods %ds:(%rsi),%rax
3247 197,124,41,192, //vmovaps %ymm8,%ymm0
3248 255,224, //jmpq *%rax
3249};
3250
3251CODE const uint8_t sk_start_pipeline_avx[] = {
3252 65,87, //push %r15
3253 65,86, //push %r14
3254 65,85, //push %r13
3255 65,84, //push %r12
3256 83, //push %rbx
3257 73,137,205, //mov %rcx,%r13
3258 73,137,214, //mov %rdx,%r14
3259 72,137,251, //mov %rdi,%rbx
3260 72,173, //lods %ds:(%rsi),%rax
3261 73,137,199, //mov %rax,%r15
3262 73,137,244, //mov %rsi,%r12
3263 72,141,67,8, //lea 0x8(%rbx),%rax
3264 76,57,232, //cmp %r13,%rax
3265 118,5, //jbe 28 <_sk_start_pipeline_avx+0x28>
3266 72,137,223, //mov %rbx,%rdi
3267 235,65, //jmp 69 <_sk_start_pipeline_avx+0x69>
3268 185,0,0,0,0, //mov $0x0,%ecx
3269 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3270 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3271 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3272 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3273 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
3274 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
3275 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
3276 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
3277 72,137,223, //mov %rbx,%rdi
3278 76,137,230, //mov %r12,%rsi
3279 76,137,242, //mov %r14,%rdx
3280 65,255,215, //callq *%r15
3281 72,141,123,8, //lea 0x8(%rbx),%rdi
3282 72,131,195,16, //add $0x10,%rbx
3283 76,57,235, //cmp %r13,%rbx
3284 72,137,251, //mov %rdi,%rbx
3285 118,191, //jbe 28 <_sk_start_pipeline_avx+0x28>
3286 76,137,233, //mov %r13,%rcx
3287 72,41,249, //sub %rdi,%rcx
3288 116,41, //je 9a <_sk_start_pipeline_avx+0x9a>
3289 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3290 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3291 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3292 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3293 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
3294 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
3295 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
3296 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
3297 76,137,230, //mov %r12,%rsi
3298 76,137,242, //mov %r14,%rdx
3299 65,255,215, //callq *%r15
3300 76,137,232, //mov %r13,%rax
3301 91, //pop %rbx
3302 65,92, //pop %r12
3303 65,93, //pop %r13
3304 65,94, //pop %r14
3305 65,95, //pop %r15
3306 197,248,119, //vzeroupper
3307 195, //retq
3308};
3309
3310CODE const uint8_t sk_just_return_avx[] = {
3311 195, //retq
3312};
3313
3314CODE const uint8_t sk_seed_shader_avx[] = {
3315 72,173, //lods %ds:(%rsi),%rax
3316 197,249,110,199, //vmovd %edi,%xmm0
3317 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
3318 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
3319 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003320 65,184,0,0,0,63, //mov $0x3f000000,%r8d
3321 196,193,121,110,200, //vmovd %r8d,%xmm1
3322 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
3323 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05003324 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
3325 197,252,88,66,20, //vaddps 0x14(%rdx),%ymm0,%ymm0
3326 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
3327 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
3328 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003329 184,0,0,128,63, //mov $0x3f800000,%eax
3330 197,249,110,208, //vmovd %eax,%xmm2
3331 196,227,121,4,210,0, //vpermilps $0x0,%xmm2,%xmm2
3332 196,227,109,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05003333 72,173, //lods %ds:(%rsi),%rax
3334 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3335 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
3336 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
3337 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
3338 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
3339 255,224, //jmpq *%rax
3340};
3341
3342CODE const uint8_t sk_constant_color_avx[] = {
3343 72,173, //lods %ds:(%rsi),%rax
3344 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
3345 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
3346 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
3347 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
3348 72,173, //lods %ds:(%rsi),%rax
3349 255,224, //jmpq *%rax
3350};
3351
3352CODE const uint8_t sk_clear_avx[] = {
3353 72,173, //lods %ds:(%rsi),%rax
3354 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3355 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3356 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3357 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
3358 255,224, //jmpq *%rax
3359};
3360
3361CODE const uint8_t sk_plus__avx[] = {
3362 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3363 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3364 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3365 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3366 72,173, //lods %ds:(%rsi),%rax
3367 255,224, //jmpq *%rax
3368};
3369
3370CODE const uint8_t sk_srcover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003371 184,0,0,128,63, //mov $0x3f800000,%eax
3372 197,121,110,192, //vmovd %eax,%xmm8
3373 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3374 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003375 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
3376 197,60,89,204, //vmulps %ymm4,%ymm8,%ymm9
3377 197,180,88,192, //vaddps %ymm0,%ymm9,%ymm0
3378 197,60,89,205, //vmulps %ymm5,%ymm8,%ymm9
3379 197,180,88,201, //vaddps %ymm1,%ymm9,%ymm1
3380 197,60,89,206, //vmulps %ymm6,%ymm8,%ymm9
3381 197,180,88,210, //vaddps %ymm2,%ymm9,%ymm2
3382 197,60,89,199, //vmulps %ymm7,%ymm8,%ymm8
3383 197,188,88,219, //vaddps %ymm3,%ymm8,%ymm3
3384 72,173, //lods %ds:(%rsi),%rax
3385 255,224, //jmpq *%rax
3386};
3387
3388CODE const uint8_t sk_dstover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003389 184,0,0,128,63, //mov $0x3f800000,%eax
3390 197,121,110,192, //vmovd %eax,%xmm8
3391 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3392 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003393 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
3394 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3395 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3396 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3397 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3398 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3399 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3400 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
3401 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3402 72,173, //lods %ds:(%rsi),%rax
3403 255,224, //jmpq *%rax
3404};
3405
3406CODE const uint8_t sk_clamp_0_avx[] = {
3407 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3408 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
3409 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
3410 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
3411 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
3412 72,173, //lods %ds:(%rsi),%rax
3413 255,224, //jmpq *%rax
3414};
3415
3416CODE const uint8_t sk_clamp_1_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003417 184,0,0,128,63, //mov $0x3f800000,%eax
3418 197,121,110,192, //vmovd %eax,%xmm8
3419 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3420 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003421 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
3422 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
3423 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
3424 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
3425 72,173, //lods %ds:(%rsi),%rax
3426 255,224, //jmpq *%rax
3427};
3428
3429CODE const uint8_t sk_clamp_a_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003430 184,0,0,128,63, //mov $0x3f800000,%eax
3431 197,121,110,192, //vmovd %eax,%xmm8
3432 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
3433 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05003434 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
3435 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
3436 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
3437 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
3438 72,173, //lods %ds:(%rsi),%rax
3439 255,224, //jmpq *%rax
3440};
3441
3442CODE const uint8_t sk_set_rgb_avx[] = {
3443 72,173, //lods %ds:(%rsi),%rax
3444 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
3445 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
3446 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
3447 72,173, //lods %ds:(%rsi),%rax
3448 255,224, //jmpq *%rax
3449};
3450
3451CODE const uint8_t sk_swap_rb_avx[] = {
3452 197,124,40,192, //vmovaps %ymm0,%ymm8
3453 72,173, //lods %ds:(%rsi),%rax
3454 197,252,40,194, //vmovaps %ymm2,%ymm0
3455 197,124,41,194, //vmovaps %ymm8,%ymm2
3456 255,224, //jmpq *%rax
3457};
3458
3459CODE const uint8_t sk_swap_avx[] = {
3460 197,124,40,195, //vmovaps %ymm3,%ymm8
3461 197,124,40,202, //vmovaps %ymm2,%ymm9
3462 197,124,40,209, //vmovaps %ymm1,%ymm10
3463 197,124,40,216, //vmovaps %ymm0,%ymm11
3464 72,173, //lods %ds:(%rsi),%rax
3465 197,252,40,196, //vmovaps %ymm4,%ymm0
3466 197,252,40,205, //vmovaps %ymm5,%ymm1
3467 197,252,40,214, //vmovaps %ymm6,%ymm2
3468 197,252,40,223, //vmovaps %ymm7,%ymm3
3469 197,124,41,220, //vmovaps %ymm11,%ymm4
3470 197,124,41,213, //vmovaps %ymm10,%ymm5
3471 197,124,41,206, //vmovaps %ymm9,%ymm6
3472 197,124,41,199, //vmovaps %ymm8,%ymm7
3473 255,224, //jmpq *%rax
3474};
3475
3476CODE const uint8_t sk_move_src_dst_avx[] = {
3477 72,173, //lods %ds:(%rsi),%rax
3478 197,252,40,224, //vmovaps %ymm0,%ymm4
3479 197,252,40,233, //vmovaps %ymm1,%ymm5
3480 197,252,40,242, //vmovaps %ymm2,%ymm6
3481 197,252,40,251, //vmovaps %ymm3,%ymm7
3482 255,224, //jmpq *%rax
3483};
3484
3485CODE const uint8_t sk_move_dst_src_avx[] = {
3486 72,173, //lods %ds:(%rsi),%rax
3487 197,252,40,196, //vmovaps %ymm4,%ymm0
3488 197,252,40,205, //vmovaps %ymm5,%ymm1
3489 197,252,40,214, //vmovaps %ymm6,%ymm2
3490 197,252,40,223, //vmovaps %ymm7,%ymm3
3491 255,224, //jmpq *%rax
3492};
3493
3494CODE const uint8_t sk_premul_avx[] = {
3495 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
3496 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
3497 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
3498 72,173, //lods %ds:(%rsi),%rax
3499 255,224, //jmpq *%rax
3500};
3501
3502CODE const uint8_t sk_unpremul_avx[] = {
3503 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3504 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003505 184,0,0,128,63, //mov $0x3f800000,%eax
3506 197,121,110,208, //vmovd %eax,%xmm10
3507 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
3508 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05003509 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
3510 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
3511 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3512 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3513 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3514 72,173, //lods %ds:(%rsi),%rax
3515 255,224, //jmpq *%rax
3516};
3517
3518CODE const uint8_t sk_from_srgb_avx[] = {
3519 196,98,125,24,66,64, //vbroadcastss 0x40(%rdx),%ymm8
3520 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
3521 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
3522 196,98,125,24,90,60, //vbroadcastss 0x3c(%rdx),%ymm11
3523 196,98,125,24,98,56, //vbroadcastss 0x38(%rdx),%ymm12
3524 197,36,89,232, //vmulps %ymm0,%ymm11,%ymm13
3525 196,65,20,88,236, //vaddps %ymm12,%ymm13,%ymm13
3526 196,98,125,24,114,52, //vbroadcastss 0x34(%rdx),%ymm14
3527 196,65,44,89,213, //vmulps %ymm13,%ymm10,%ymm10
3528 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
3529 196,98,125,24,106,68, //vbroadcastss 0x44(%rdx),%ymm13
3530 196,193,124,194,197,1, //vcmpltps %ymm13,%ymm0,%ymm0
3531 196,195,45,74,193,0, //vblendvps %ymm0,%ymm9,%ymm10,%ymm0
3532 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
3533 197,116,89,209, //vmulps %ymm1,%ymm1,%ymm10
3534 197,36,89,249, //vmulps %ymm1,%ymm11,%ymm15
3535 196,65,4,88,252, //vaddps %ymm12,%ymm15,%ymm15
3536 196,65,44,89,215, //vmulps %ymm15,%ymm10,%ymm10
3537 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
3538 196,193,116,194,205,1, //vcmpltps %ymm13,%ymm1,%ymm1
3539 196,195,45,74,201,16, //vblendvps %ymm1,%ymm9,%ymm10,%ymm1
3540 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
3541 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
3542 197,36,89,210, //vmulps %ymm2,%ymm11,%ymm10
3543 196,65,44,88,212, //vaddps %ymm12,%ymm10,%ymm10
3544 196,65,52,89,202, //vmulps %ymm10,%ymm9,%ymm9
3545 196,65,12,88,201, //vaddps %ymm9,%ymm14,%ymm9
3546 196,193,108,194,213,1, //vcmpltps %ymm13,%ymm2,%ymm2
3547 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
3548 72,173, //lods %ds:(%rsi),%rax
3549 255,224, //jmpq *%rax
3550};
3551
3552CODE const uint8_t sk_to_srgb_avx[] = {
3553 197,124,82,192, //vrsqrtps %ymm0,%ymm8
3554 196,65,124,83,200, //vrcpps %ymm8,%ymm9
3555 196,65,124,82,208, //vrsqrtps %ymm8,%ymm10
3556 196,98,125,24,66,72, //vbroadcastss 0x48(%rdx),%ymm8
3557 197,60,89,216, //vmulps %ymm0,%ymm8,%ymm11
3558 196,98,125,24,34, //vbroadcastss (%rdx),%ymm12
3559 196,98,125,24,106,76, //vbroadcastss 0x4c(%rdx),%ymm13
3560 196,98,125,24,114,80, //vbroadcastss 0x50(%rdx),%ymm14
3561 196,98,125,24,122,84, //vbroadcastss 0x54(%rdx),%ymm15
3562 196,65,52,89,206, //vmulps %ymm14,%ymm9,%ymm9
3563 196,65,52,88,207, //vaddps %ymm15,%ymm9,%ymm9
3564 196,65,44,89,213, //vmulps %ymm13,%ymm10,%ymm10
3565 196,65,44,88,201, //vaddps %ymm9,%ymm10,%ymm9
3566 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
3567 196,98,125,24,82,88, //vbroadcastss 0x58(%rdx),%ymm10
3568 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
3569 196,195,53,74,195,0, //vblendvps %ymm0,%ymm11,%ymm9,%ymm0
3570 197,124,82,201, //vrsqrtps %ymm1,%ymm9
3571 196,65,124,83,217, //vrcpps %ymm9,%ymm11
3572 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
3573 196,65,12,89,219, //vmulps %ymm11,%ymm14,%ymm11
3574 196,65,4,88,219, //vaddps %ymm11,%ymm15,%ymm11
3575 196,65,20,89,201, //vmulps %ymm9,%ymm13,%ymm9
3576 196,65,52,88,203, //vaddps %ymm11,%ymm9,%ymm9
3577 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
3578 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
3579 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
3580 196,195,53,74,203,16, //vblendvps %ymm1,%ymm11,%ymm9,%ymm1
3581 197,124,82,202, //vrsqrtps %ymm2,%ymm9
3582 196,65,124,83,217, //vrcpps %ymm9,%ymm11
3583 196,65,12,89,219, //vmulps %ymm11,%ymm14,%ymm11
3584 196,65,4,88,219, //vaddps %ymm11,%ymm15,%ymm11
3585 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
3586 196,65,20,89,201, //vmulps %ymm9,%ymm13,%ymm9
3587 196,65,52,88,203, //vaddps %ymm11,%ymm9,%ymm9
3588 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
3589 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
3590 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
3591 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
3592 72,173, //lods %ds:(%rsi),%rax
3593 255,224, //jmpq *%rax
3594};
3595
3596CODE const uint8_t sk_scale_1_float_avx[] = {
3597 72,173, //lods %ds:(%rsi),%rax
3598 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3599 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3600 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3601 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3602 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
3603 72,173, //lods %ds:(%rsi),%rax
3604 255,224, //jmpq *%rax
3605};
3606
3607CODE const uint8_t sk_scale_u8_avx[] = {
3608 73,137,200, //mov %rcx,%r8
3609 72,173, //lods %ds:(%rsi),%rax
3610 72,139,0, //mov (%rax),%rax
3611 72,1,248, //add %rdi,%rax
3612 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003613 117,80, //jne 4f8 <_sk_scale_u8_avx+0x60>
Mike Klein894d5612017-03-07 07:59:52 -05003614 197,123,16,0, //vmovsd (%rax),%xmm8
3615 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
3616 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
3617 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
3618 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
3619 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003620 184,129,128,128,59, //mov $0x3b808081,%eax
3621 197,121,110,200, //vmovd %eax,%xmm9
3622 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
3623 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05003624 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
3625 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
3626 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
3627 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
3628 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
3629 72,173, //lods %ds:(%rsi),%rax
3630 76,137,193, //mov %r8,%rcx
3631 255,224, //jmpq *%rax
3632 49,201, //xor %ecx,%ecx
3633 77,137,194, //mov %r8,%r10
3634 69,49,201, //xor %r9d,%r9d
3635 68,15,182,24, //movzbl (%rax),%r11d
3636 72,255,192, //inc %rax
3637 73,211,227, //shl %cl,%r11
3638 77,9,217, //or %r11,%r9
3639 72,131,193,8, //add $0x8,%rcx
3640 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003641 117,234, //jne 500 <_sk_scale_u8_avx+0x68>
Mike Klein894d5612017-03-07 07:59:52 -05003642 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003643 235,143, //jmp 4ac <_sk_scale_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05003644};
3645
3646CODE const uint8_t sk_lerp_1_float_avx[] = {
3647 72,173, //lods %ds:(%rsi),%rax
3648 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
3649 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
3650 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
3651 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3652 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
3653 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
3654 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3655 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
3656 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
3657 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3658 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
3659 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
3660 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3661 72,173, //lods %ds:(%rsi),%rax
3662 255,224, //jmpq *%rax
3663};
3664
3665CODE const uint8_t sk_lerp_u8_avx[] = {
3666 73,137,200, //mov %rcx,%r8
3667 72,173, //lods %ds:(%rsi),%rax
3668 72,139,0, //mov (%rax),%rax
3669 72,1,248, //add %rdi,%rax
3670 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003671 117,116, //jne 5e0 <_sk_lerp_u8_avx+0x84>
Mike Klein894d5612017-03-07 07:59:52 -05003672 197,123,16,0, //vmovsd (%rax),%xmm8
3673 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
3674 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
3675 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
3676 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
3677 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003678 184,129,128,128,59, //mov $0x3b808081,%eax
3679 197,121,110,200, //vmovd %eax,%xmm9
3680 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
3681 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05003682 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
3683 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
3684 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
3685 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3686 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
3687 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
3688 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3689 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
3690 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
3691 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
3692 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
3693 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
3694 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
3695 72,173, //lods %ds:(%rsi),%rax
3696 76,137,193, //mov %r8,%rcx
3697 255,224, //jmpq *%rax
3698 49,201, //xor %ecx,%ecx
3699 77,137,194, //mov %r8,%r10
3700 69,49,201, //xor %r9d,%r9d
3701 68,15,182,24, //movzbl (%rax),%r11d
3702 72,255,192, //inc %rax
3703 73,211,227, //shl %cl,%r11
3704 77,9,217, //or %r11,%r9
3705 72,131,193,8, //add $0x8,%rcx
3706 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003707 117,234, //jne 5e8 <_sk_lerp_u8_avx+0x8c>
Mike Klein894d5612017-03-07 07:59:52 -05003708 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003709 233,104,255,255,255, //jmpq 570 <_sk_lerp_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05003710};
3711
3712CODE const uint8_t sk_lerp_565_avx[] = {
3713 72,173, //lods %ds:(%rsi),%rax
3714 76,139,16, //mov (%rax),%r10
3715 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003716 15,133,164,0,0,0, //jne 6ba <_sk_lerp_565_avx+0xb2>
Mike Klein894d5612017-03-07 07:59:52 -05003717 196,65,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm8
3718 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
3719 197,185,105,219, //vpunpckhwd %xmm3,%xmm8,%xmm3
3720 196,66,121,51,192, //vpmovzxwd %xmm8,%xmm8
3721 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
3722 196,98,125,24,66,104, //vbroadcastss 0x68(%rdx),%ymm8
3723 197,60,84,195, //vandps %ymm3,%ymm8,%ymm8
3724 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
3725 196,98,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm9
3726 196,65,52,89,192, //vmulps %ymm8,%ymm9,%ymm8
3727 196,98,125,24,74,108, //vbroadcastss 0x6c(%rdx),%ymm9
3728 197,52,84,203, //vandps %ymm3,%ymm9,%ymm9
3729 196,65,124,91,201, //vcvtdq2ps %ymm9,%ymm9
3730 196,98,125,24,82,120, //vbroadcastss 0x78(%rdx),%ymm10
3731 196,65,44,89,201, //vmulps %ymm9,%ymm10,%ymm9
3732 196,98,125,24,82,112, //vbroadcastss 0x70(%rdx),%ymm10
3733 197,172,84,219, //vandps %ymm3,%ymm10,%ymm3
3734 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
3735 196,98,125,24,82,124, //vbroadcastss 0x7c(%rdx),%ymm10
3736 197,172,89,219, //vmulps %ymm3,%ymm10,%ymm3
3737 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
3738 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
3739 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
3740 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
3741 196,193,116,89,201, //vmulps %ymm9,%ymm1,%ymm1
3742 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
3743 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
3744 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
3745 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003746 184,0,0,128,63, //mov $0x3f800000,%eax
3747 197,249,110,216, //vmovd %eax,%xmm3
3748 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
3749 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05003750 72,173, //lods %ds:(%rsi),%rax
3751 255,224, //jmpq *%rax
3752 65,137,200, //mov %ecx,%r8d
3753 65,128,224,7, //and $0x7,%r8b
3754 196,65,57,239,192, //vpxor %xmm8,%xmm8,%xmm8
3755 65,254,200, //dec %r8b
3756 69,15,182,192, //movzbl %r8b,%r8d
3757 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003758 15,135,69,255,255,255, //ja 61c <_sk_lerp_565_avx+0x14>
3759 76,141,13,74,0,0,0, //lea 0x4a(%rip),%r9 # 728 <_sk_lerp_565_avx+0x120>
Mike Klein894d5612017-03-07 07:59:52 -05003760 75,99,4,129, //movslq (%r9,%r8,4),%rax
3761 76,1,200, //add %r9,%rax
3762 255,224, //jmpq *%rax
3763 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
3764 196,65,97,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm8
3765 196,65,57,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm8,%xmm8
3766 196,65,57,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm8,%xmm8
3767 196,65,57,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm8,%xmm8
3768 196,65,57,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
3769 196,65,57,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
3770 196,65,57,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003771 233,245,254,255,255, //jmpq 61c <_sk_lerp_565_avx+0x14>
3772 144, //nop
3773 243,255, //repz (bad)
3774 255, //(bad)
3775 255, //(bad)
3776 235,255, //jmp 72d <_sk_lerp_565_avx+0x125>
3777 255, //(bad)
3778 255,227, //jmpq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05003779 255, //(bad)
3780 255, //(bad)
3781 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003782 219,255, //(bad)
3783 255, //(bad)
3784 255,211, //callq *%rbx
Mike Klein894d5612017-03-07 07:59:52 -05003785 255, //(bad)
3786 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003787 255,203, //dec %ebx
Mike Klein894d5612017-03-07 07:59:52 -05003788 255, //(bad)
3789 255, //(bad)
3790 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003791 191, //.byte 0xbf
Mike Klein894d5612017-03-07 07:59:52 -05003792 255, //(bad)
3793 255, //(bad)
3794 255, //.byte 0xff
3795};
3796
3797CODE const uint8_t sk_load_tables_avx[] = {
3798 85, //push %rbp
3799 65,87, //push %r15
3800 65,86, //push %r14
3801 65,85, //push %r13
3802 65,84, //push %r12
3803 83, //push %rbx
3804 72,173, //lods %ds:(%rsi),%rax
3805 76,139,0, //mov (%rax),%r8
3806 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003807 15,133,18,2,0,0, //jne 96e <_sk_load_tables_avx+0x22a>
Mike Klein894d5612017-03-07 07:59:52 -05003808 196,65,124,16,4,184, //vmovups (%r8,%rdi,4),%ymm8
3809 196,98,125,24,74,16, //vbroadcastss 0x10(%rdx),%ymm9
3810 196,193,52,84,192, //vandps %ymm8,%ymm9,%ymm0
3811 196,193,249,126,193, //vmovq %xmm0,%r9
3812 69,137,203, //mov %r9d,%r11d
3813 196,195,249,22,194,1, //vpextrq $0x1,%xmm0,%r10
3814 69,137,214, //mov %r10d,%r14d
3815 73,193,234,32, //shr $0x20,%r10
3816 73,193,233,32, //shr $0x20,%r9
3817 196,227,125,25,192,1, //vextractf128 $0x1,%ymm0,%xmm0
3818 196,193,249,126,196, //vmovq %xmm0,%r12
3819 69,137,231, //mov %r12d,%r15d
3820 196,227,249,22,195,1, //vpextrq $0x1,%xmm0,%rbx
3821 65,137,221, //mov %ebx,%r13d
3822 72,193,235,32, //shr $0x20,%rbx
3823 73,193,236,32, //shr $0x20,%r12
3824 72,139,104,8, //mov 0x8(%rax),%rbp
3825 76,139,64,16, //mov 0x10(%rax),%r8
3826 196,161,122,16,68,189,0, //vmovss 0x0(%rbp,%r15,4),%xmm0
3827 196,163,121,33,68,165,0,16, //vinsertps $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
3828 196,163,121,33,68,173,0,32, //vinsertps $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
3829 197,250,16,76,157,0, //vmovss 0x0(%rbp,%rbx,4),%xmm1
3830 196,227,121,33,193,48, //vinsertps $0x30,%xmm1,%xmm0,%xmm0
3831 196,161,122,16,76,157,0, //vmovss 0x0(%rbp,%r11,4),%xmm1
3832 196,163,113,33,76,141,0,16, //vinsertps $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
3833 196,163,113,33,76,181,0,32, //vinsertps $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
3834 196,161,122,16,92,149,0, //vmovss 0x0(%rbp,%r10,4),%xmm3
3835 196,227,113,33,203,48, //vinsertps $0x30,%xmm3,%xmm1,%xmm1
3836 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
3837 196,193,113,114,208,8, //vpsrld $0x8,%xmm8,%xmm1
3838 196,67,125,25,194,1, //vextractf128 $0x1,%ymm8,%xmm10
3839 196,193,105,114,210,8, //vpsrld $0x8,%xmm10,%xmm2
3840 196,227,117,24,202,1, //vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
3841 197,180,84,201, //vandps %ymm1,%ymm9,%ymm1
3842 196,193,249,126,201, //vmovq %xmm1,%r9
3843 69,137,203, //mov %r9d,%r11d
3844 196,195,249,22,202,1, //vpextrq $0x1,%xmm1,%r10
3845 69,137,214, //mov %r10d,%r14d
3846 73,193,234,32, //shr $0x20,%r10
3847 73,193,233,32, //shr $0x20,%r9
3848 196,227,125,25,201,1, //vextractf128 $0x1,%ymm1,%xmm1
3849 196,225,249,126,205, //vmovq %xmm1,%rbp
3850 65,137,239, //mov %ebp,%r15d
3851 196,227,249,22,203,1, //vpextrq $0x1,%xmm1,%rbx
3852 65,137,220, //mov %ebx,%r12d
3853 72,193,235,32, //shr $0x20,%rbx
3854 72,193,237,32, //shr $0x20,%rbp
3855 196,129,122,16,12,184, //vmovss (%r8,%r15,4),%xmm1
3856 196,195,113,33,12,168,16, //vinsertps $0x10,(%r8,%rbp,4),%xmm1,%xmm1
3857 196,129,122,16,20,160, //vmovss (%r8,%r12,4),%xmm2
3858 196,227,113,33,202,32, //vinsertps $0x20,%xmm2,%xmm1,%xmm1
3859 196,193,122,16,20,152, //vmovss (%r8,%rbx,4),%xmm2
3860 196,227,113,33,202,48, //vinsertps $0x30,%xmm2,%xmm1,%xmm1
3861 196,129,122,16,20,152, //vmovss (%r8,%r11,4),%xmm2
3862 196,131,105,33,20,136,16, //vinsertps $0x10,(%r8,%r9,4),%xmm2,%xmm2
3863 196,129,122,16,28,176, //vmovss (%r8,%r14,4),%xmm3
3864 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
3865 196,129,122,16,28,144, //vmovss (%r8,%r10,4),%xmm3
3866 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
3867 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
3868 72,139,64,24, //mov 0x18(%rax),%rax
3869 196,193,105,114,208,16, //vpsrld $0x10,%xmm8,%xmm2
3870 196,193,97,114,210,16, //vpsrld $0x10,%xmm10,%xmm3
3871 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
3872 197,180,84,210, //vandps %ymm2,%ymm9,%ymm2
3873 196,193,249,126,208, //vmovq %xmm2,%r8
3874 69,137,194, //mov %r8d,%r10d
3875 196,195,249,22,209,1, //vpextrq $0x1,%xmm2,%r9
3876 69,137,203, //mov %r9d,%r11d
3877 73,193,233,32, //shr $0x20,%r9
3878 73,193,232,32, //shr $0x20,%r8
3879 196,227,125,25,210,1, //vextractf128 $0x1,%ymm2,%xmm2
3880 196,225,249,126,213, //vmovq %xmm2,%rbp
3881 65,137,238, //mov %ebp,%r14d
3882 196,227,249,22,211,1, //vpextrq $0x1,%xmm2,%rbx
3883 65,137,223, //mov %ebx,%r15d
3884 72,193,235,32, //shr $0x20,%rbx
3885 72,193,237,32, //shr $0x20,%rbp
3886 196,161,122,16,20,176, //vmovss (%rax,%r14,4),%xmm2
3887 196,227,105,33,20,168,16, //vinsertps $0x10,(%rax,%rbp,4),%xmm2,%xmm2
3888 196,161,122,16,28,184, //vmovss (%rax,%r15,4),%xmm3
3889 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
3890 197,250,16,28,152, //vmovss (%rax,%rbx,4),%xmm3
3891 196,99,105,33,203,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm9
3892 196,161,122,16,28,144, //vmovss (%rax,%r10,4),%xmm3
3893 196,163,97,33,28,128,16, //vinsertps $0x10,(%rax,%r8,4),%xmm3,%xmm3
3894 196,161,122,16,20,152, //vmovss (%rax,%r11,4),%xmm2
3895 196,227,97,33,210,32, //vinsertps $0x20,%xmm2,%xmm3,%xmm2
3896 196,161,122,16,28,136, //vmovss (%rax,%r9,4),%xmm3
3897 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
3898 196,195,109,24,209,1, //vinsertf128 $0x1,%xmm9,%ymm2,%ymm2
3899 196,193,57,114,208,24, //vpsrld $0x18,%xmm8,%xmm8
3900 196,193,97,114,210,24, //vpsrld $0x18,%xmm10,%xmm3
3901 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
3902 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
3903 196,98,125,24,66,12, //vbroadcastss 0xc(%rdx),%ymm8
3904 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
3905 72,173, //lods %ds:(%rsi),%rax
3906 91, //pop %rbx
3907 65,92, //pop %r12
3908 65,93, //pop %r13
3909 65,94, //pop %r14
3910 65,95, //pop %r15
3911 93, //pop %rbp
3912 255,224, //jmpq *%rax
3913 65,137,201, //mov %ecx,%r9d
3914 65,128,225,7, //and $0x7,%r9b
3915 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
3916 65,254,201, //dec %r9b
3917 69,15,182,201, //movzbl %r9b,%r9d
3918 65,128,249,6, //cmp $0x6,%r9b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003919 15,135,215,253,255,255, //ja 762 <_sk_load_tables_avx+0x1e>
3920 76,141,21,138,0,0,0, //lea 0x8a(%rip),%r10 # a1c <_sk_load_tables_avx+0x2d8>
Mike Klein894d5612017-03-07 07:59:52 -05003921 79,99,12,138, //movslq (%r10,%r9,4),%r9
3922 77,1,209, //add %r10,%r9
3923 65,255,225, //jmpq *%r9
3924 196,193,121,110,68,184,24, //vmovd 0x18(%r8,%rdi,4),%xmm0
3925 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
3926 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
3927 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3928 196,99,117,12,192,64, //vblendps $0x40,%ymm0,%ymm1,%ymm8
3929 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
3930 196,195,121,34,68,184,20,1, //vpinsrd $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
3931 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
3932 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
3933 196,195,121,34,68,184,16,0, //vpinsrd $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
3934 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
3935 196,195,57,34,68,184,12,3, //vpinsrd $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
3936 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
3937 196,195,57,34,68,184,8,2, //vpinsrd $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
3938 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
3939 196,195,57,34,68,184,4,1, //vpinsrd $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
3940 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
3941 196,195,57,34,4,184,0, //vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
3942 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003943 233,70,253,255,255, //jmpq 762 <_sk_load_tables_avx+0x1e>
Mike Klein894d5612017-03-07 07:59:52 -05003944 238, //out %al,(%dx)
3945 255, //(bad)
3946 255, //(bad)
3947 255,224, //jmpq *%rax
3948 255, //(bad)
3949 255, //(bad)
3950 255,210, //callq *%rdx
3951 255, //(bad)
3952 255, //(bad)
3953 255,196, //inc %esp
3954 255, //(bad)
3955 255, //(bad)
3956 255,176,255,255,255,156, //pushq -0x63000001(%rax)
3957 255, //(bad)
3958 255, //(bad)
3959 255, //.byte 0xff
3960 128,255,255, //cmp $0xff,%bh
3961 255, //.byte 0xff
3962};
3963
3964CODE const uint8_t sk_load_a8_avx[] = {
3965 73,137,200, //mov %rcx,%r8
3966 72,173, //lods %ds:(%rsi),%rax
3967 72,139,0, //mov (%rax),%rax
3968 72,1,248, //add %rdi,%rax
3969 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003970 117,59, //jne a83 <_sk_load_a8_avx+0x4b>
Mike Klein894d5612017-03-07 07:59:52 -05003971 197,251,16,0, //vmovsd (%rax),%xmm0
3972 196,226,121,49,200, //vpmovzxbd %xmm0,%xmm1
3973 196,227,121,4,192,229, //vpermilps $0xe5,%xmm0,%xmm0
3974 196,226,121,49,192, //vpmovzxbd %xmm0,%xmm0
3975 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
3976 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
3977 196,226,125,24,74,12, //vbroadcastss 0xc(%rdx),%ymm1
3978 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
3979 72,173, //lods %ds:(%rsi),%rax
3980 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
3981 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
3982 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
3983 76,137,193, //mov %r8,%rcx
3984 255,224, //jmpq *%rax
3985 49,201, //xor %ecx,%ecx
3986 77,137,194, //mov %r8,%r10
3987 69,49,201, //xor %r9d,%r9d
3988 68,15,182,24, //movzbl (%rax),%r11d
3989 72,255,192, //inc %rax
3990 73,211,227, //shl %cl,%r11
3991 77,9,217, //or %r11,%r9
3992 72,131,193,8, //add $0x8,%rcx
3993 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003994 117,234, //jne a8b <_sk_load_a8_avx+0x53>
Mike Klein894d5612017-03-07 07:59:52 -05003995 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05003996 235,164, //jmp a4c <_sk_load_a8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05003997};
3998
3999CODE const uint8_t sk_store_a8_avx[] = {
4000 72,173, //lods %ds:(%rsi),%rax
4001 76,139,8, //mov (%rax),%r9
4002 196,98,125,24,66,8, //vbroadcastss 0x8(%rdx),%ymm8
4003 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
4004 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
4005 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4006 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
4007 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
4008 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004009 117,10, //jne adb <_sk_store_a8_avx+0x33>
Mike Klein894d5612017-03-07 07:59:52 -05004010 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
4011 72,173, //lods %ds:(%rsi),%rax
4012 255,224, //jmpq *%rax
4013 137,200, //mov %ecx,%eax
4014 36,7, //and $0x7,%al
4015 254,200, //dec %al
4016 68,15,182,192, //movzbl %al,%r8d
4017 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004018 119,236, //ja ad7 <_sk_store_a8_avx+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05004019 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004020 76,141,21,69,0,0,0, //lea 0x45(%rip),%r10 # b3c <_sk_store_a8_avx+0x94>
Mike Klein894d5612017-03-07 07:59:52 -05004021 75,99,4,130, //movslq (%r10,%r8,4),%rax
4022 76,1,208, //add %r10,%rax
4023 255,224, //jmpq *%rax
4024 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
4025 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
4026 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
4027 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
4028 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
4029 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
4030 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004031 235,158, //jmp ad7 <_sk_store_a8_avx+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05004032 15,31,0, //nopl (%rax)
4033 244, //hlt
4034 255, //(bad)
4035 255, //(bad)
4036 255, //(bad)
4037 236, //in (%dx),%al
4038 255, //(bad)
4039 255, //(bad)
4040 255,228, //jmpq *%rsp
4041 255, //(bad)
4042 255, //(bad)
4043 255, //(bad)
4044 220,255, //fdivr %st,%st(7)
4045 255, //(bad)
4046 255,212, //callq *%rsp
4047 255, //(bad)
4048 255, //(bad)
4049 255,204, //dec %esp
4050 255, //(bad)
4051 255, //(bad)
4052 255,196, //inc %esp
4053 255, //(bad)
4054 255, //(bad)
4055 255, //.byte 0xff
4056};
4057
4058CODE const uint8_t sk_load_565_avx[] = {
4059 72,173, //lods %ds:(%rsi),%rax
4060 76,139,16, //mov (%rax),%r10
4061 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004062 117,106, //jne bcc <_sk_load_565_avx+0x74>
Mike Klein894d5612017-03-07 07:59:52 -05004063 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
4064 197,241,239,201, //vpxor %xmm1,%xmm1,%xmm1
4065 197,249,105,201, //vpunpckhwd %xmm1,%xmm0,%xmm1
4066 196,226,121,51,192, //vpmovzxwd %xmm0,%xmm0
4067 196,227,125,24,209,1, //vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
4068 196,226,125,24,66,104, //vbroadcastss 0x68(%rdx),%ymm0
4069 197,252,84,194, //vandps %ymm2,%ymm0,%ymm0
4070 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
4071 196,226,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm1
4072 197,244,89,192, //vmulps %ymm0,%ymm1,%ymm0
4073 196,226,125,24,74,108, //vbroadcastss 0x6c(%rdx),%ymm1
4074 197,244,84,202, //vandps %ymm2,%ymm1,%ymm1
4075 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
4076 196,226,125,24,90,120, //vbroadcastss 0x78(%rdx),%ymm3
4077 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
4078 196,226,125,24,90,112, //vbroadcastss 0x70(%rdx),%ymm3
4079 197,228,84,210, //vandps %ymm2,%ymm3,%ymm2
4080 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
4081 196,226,125,24,90,124, //vbroadcastss 0x7c(%rdx),%ymm3
4082 197,228,89,210, //vmulps %ymm2,%ymm3,%ymm2
4083 196,226,125,24,26, //vbroadcastss (%rdx),%ymm3
4084 72,173, //lods %ds:(%rsi),%rax
4085 255,224, //jmpq *%rax
4086 65,137,200, //mov %ecx,%r8d
4087 65,128,224,7, //and $0x7,%r8b
4088 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
4089 65,254,200, //dec %r8b
4090 69,15,182,192, //movzbl %r8b,%r8d
4091 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004092 119,132, //ja b68 <_sk_load_565_avx+0x10>
4093 76,141,13,73,0,0,0, //lea 0x49(%rip),%r9 # c34 <_sk_load_565_avx+0xdc>
Mike Klein894d5612017-03-07 07:59:52 -05004094 75,99,4,129, //movslq (%r9,%r8,4),%rax
4095 76,1,200, //add %r9,%rax
4096 255,224, //jmpq *%rax
4097 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
4098 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
4099 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
4100 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
4101 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
4102 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
4103 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
4104 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004105 233,52,255,255,255, //jmpq b68 <_sk_load_565_avx+0x10>
Mike Klein894d5612017-03-07 07:59:52 -05004106 244, //hlt
4107 255, //(bad)
4108 255, //(bad)
4109 255, //(bad)
4110 236, //in (%dx),%al
4111 255, //(bad)
4112 255, //(bad)
4113 255,228, //jmpq *%rsp
4114 255, //(bad)
4115 255, //(bad)
4116 255, //(bad)
4117 220,255, //fdivr %st,%st(7)
4118 255, //(bad)
4119 255,212, //callq *%rsp
4120 255, //(bad)
4121 255, //(bad)
4122 255,204, //dec %esp
4123 255, //(bad)
4124 255, //(bad)
4125 255,192, //inc %eax
4126 255, //(bad)
4127 255, //(bad)
4128 255, //.byte 0xff
4129};
4130
4131CODE const uint8_t sk_store_565_avx[] = {
4132 72,173, //lods %ds:(%rsi),%rax
4133 76,139,8, //mov (%rax),%r9
4134 196,98,125,24,130,128,0,0,0, //vbroadcastss 0x80(%rdx),%ymm8
4135 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
4136 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
4137 196,193,41,114,241,11, //vpslld $0xb,%xmm9,%xmm10
4138 196,67,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm9
4139 196,193,49,114,241,11, //vpslld $0xb,%xmm9,%xmm9
4140 196,67,45,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
4141 196,98,125,24,146,132,0,0,0, //vbroadcastss 0x84(%rdx),%ymm10
4142 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
4143 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
4144 196,193,33,114,242,5, //vpslld $0x5,%xmm10,%xmm11
4145 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
4146 196,193,41,114,242,5, //vpslld $0x5,%xmm10,%xmm10
4147 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
4148 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
4149 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
4150 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
4151 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
4152 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4153 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
4154 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004155 117,10, //jne cd6 <_sk_store_565_avx+0x86>
Mike Klein894d5612017-03-07 07:59:52 -05004156 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
4157 72,173, //lods %ds:(%rsi),%rax
4158 255,224, //jmpq *%rax
4159 137,200, //mov %ecx,%eax
4160 36,7, //and $0x7,%al
4161 254,200, //dec %al
4162 68,15,182,192, //movzbl %al,%r8d
4163 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004164 119,236, //ja cd2 <_sk_store_565_avx+0x82>
4165 76,141,21,71,0,0,0, //lea 0x47(%rip),%r10 # d34 <_sk_store_565_avx+0xe4>
Mike Klein894d5612017-03-07 07:59:52 -05004166 75,99,4,130, //movslq (%r10,%r8,4),%rax
4167 76,1,208, //add %r10,%rax
4168 255,224, //jmpq *%rax
4169 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
4170 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
4171 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
4172 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
4173 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
4174 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
4175 197,121,126,192, //vmovd %xmm8,%eax
4176 102,65,137,4,121, //mov %ax,(%r9,%rdi,2)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004177 235,161, //jmp cd2 <_sk_store_565_avx+0x82>
Mike Klein894d5612017-03-07 07:59:52 -05004178 15,31,0, //nopl (%rax)
4179 242,255, //repnz (bad)
4180 255, //(bad)
4181 255, //(bad)
4182 234, //(bad)
4183 255, //(bad)
4184 255, //(bad)
4185 255,226, //jmpq *%rdx
4186 255, //(bad)
4187 255, //(bad)
4188 255, //(bad)
4189 218,255, //(bad)
4190 255, //(bad)
4191 255,210, //callq *%rdx
4192 255, //(bad)
4193 255, //(bad)
4194 255,202, //dec %edx
4195 255, //(bad)
4196 255, //(bad)
4197 255,194, //inc %edx
4198 255, //(bad)
4199 255, //(bad)
4200 255, //.byte 0xff
4201};
4202
4203CODE const uint8_t sk_load_8888_avx[] = {
4204 72,173, //lods %ds:(%rsi),%rax
4205 76,139,16, //mov (%rax),%r10
4206 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004207 15,133,157,0,0,0, //jne dfb <_sk_load_8888_avx+0xab>
Mike Klein894d5612017-03-07 07:59:52 -05004208 196,65,124,16,12,186, //vmovups (%r10,%rdi,4),%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004209 184,255,0,0,0, //mov $0xff,%eax
4210 197,249,110,192, //vmovd %eax,%xmm0
4211 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
4212 196,99,125,24,216,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm11
Mike Klein894d5612017-03-07 07:59:52 -05004213 196,193,36,84,193, //vandps %ymm9,%ymm11,%ymm0
4214 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004215 184,129,128,128,59, //mov $0x3b808081,%eax
4216 197,249,110,200, //vmovd %eax,%xmm1
4217 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
4218 196,99,117,24,193,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm8
4219 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05004220 196,193,41,114,209,8, //vpsrld $0x8,%xmm9,%xmm10
4221 196,99,125,25,203,1, //vextractf128 $0x1,%ymm9,%xmm3
4222 197,241,114,211,8, //vpsrld $0x8,%xmm3,%xmm1
4223 196,227,45,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm10,%ymm1
4224 197,164,84,201, //vandps %ymm1,%ymm11,%ymm1
4225 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004226 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05004227 196,193,41,114,209,16, //vpsrld $0x10,%xmm9,%xmm10
4228 197,233,114,211,16, //vpsrld $0x10,%xmm3,%xmm2
4229 196,227,45,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm10,%ymm2
4230 197,164,84,210, //vandps %ymm2,%ymm11,%ymm2
4231 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004232 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05004233 196,193,49,114,209,24, //vpsrld $0x18,%xmm9,%xmm9
4234 197,225,114,211,24, //vpsrld $0x18,%xmm3,%xmm3
4235 196,227,53,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
4236 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
4237 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
4238 72,173, //lods %ds:(%rsi),%rax
4239 255,224, //jmpq *%rax
4240 65,137,200, //mov %ecx,%r8d
4241 65,128,224,7, //and $0x7,%r8b
4242 196,65,52,87,201, //vxorps %ymm9,%ymm9,%ymm9
4243 65,254,200, //dec %r8b
4244 69,15,182,192, //movzbl %r8b,%r8d
4245 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004246 15,135,76,255,255,255, //ja d64 <_sk_load_8888_avx+0x14>
4247 76,141,13,137,0,0,0, //lea 0x89(%rip),%r9 # ea8 <_sk_load_8888_avx+0x158>
Mike Klein894d5612017-03-07 07:59:52 -05004248 75,99,4,129, //movslq (%r9,%r8,4),%rax
4249 76,1,200, //add %r9,%rax
4250 255,224, //jmpq *%rax
4251 196,193,121,110,68,186,24, //vmovd 0x18(%r10,%rdi,4),%xmm0
4252 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
4253 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
4254 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
4255 196,99,117,12,200,64, //vblendps $0x40,%ymm0,%ymm1,%ymm9
4256 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
4257 196,195,121,34,68,186,20,1, //vpinsrd $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
4258 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
4259 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
4260 196,195,121,34,68,186,16,0, //vpinsrd $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
4261 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
4262 196,195,49,34,68,186,12,3, //vpinsrd $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
4263 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
4264 196,195,49,34,68,186,8,2, //vpinsrd $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
4265 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
4266 196,195,49,34,68,186,4,1, //vpinsrd $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
4267 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
4268 196,195,49,34,4,186,0, //vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
4269 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004270 233,188,254,255,255, //jmpq d64 <_sk_load_8888_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05004271 238, //out %al,(%dx)
4272 255, //(bad)
4273 255, //(bad)
4274 255,224, //jmpq *%rax
4275 255, //(bad)
4276 255, //(bad)
4277 255,210, //callq *%rdx
4278 255, //(bad)
4279 255, //(bad)
4280 255,196, //inc %esp
4281 255, //(bad)
4282 255, //(bad)
4283 255,176,255,255,255,156, //pushq -0x63000001(%rax)
4284 255, //(bad)
4285 255, //(bad)
4286 255, //.byte 0xff
4287 128,255,255, //cmp $0xff,%bh
4288 255, //.byte 0xff
4289};
4290
4291CODE const uint8_t sk_store_8888_avx[] = {
4292 72,173, //lods %ds:(%rsi),%rax
4293 76,139,8, //mov (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004294 184,0,0,127,67, //mov $0x437f0000,%eax
4295 197,121,110,192, //vmovd %eax,%xmm8
4296 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
4297 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05004298 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
4299 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
4300 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
4301 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
4302 196,193,33,114,242,8, //vpslld $0x8,%xmm10,%xmm11
4303 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
4304 196,193,41,114,242,8, //vpslld $0x8,%xmm10,%xmm10
4305 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
4306 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
4307 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
4308 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
4309 196,193,33,114,242,16, //vpslld $0x10,%xmm10,%xmm11
4310 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
4311 196,193,41,114,242,16, //vpslld $0x10,%xmm10,%xmm10
4312 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
4313 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
4314 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
4315 196,193,33,114,240,24, //vpslld $0x18,%xmm8,%xmm11
4316 196,67,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm8
4317 196,193,57,114,240,24, //vpslld $0x18,%xmm8,%xmm8
4318 196,67,37,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm11,%ymm8
4319 196,65,45,86,192, //vorpd %ymm8,%ymm10,%ymm8
4320 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
4321 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004322 117,10, //jne f68 <_sk_store_8888_avx+0xa4>
Mike Klein894d5612017-03-07 07:59:52 -05004323 196,65,124,17,4,185, //vmovups %ymm8,(%r9,%rdi,4)
4324 72,173, //lods %ds:(%rsi),%rax
4325 255,224, //jmpq *%rax
4326 137,200, //mov %ecx,%eax
4327 36,7, //and $0x7,%al
4328 254,200, //dec %al
4329 68,15,182,192, //movzbl %al,%r8d
4330 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004331 119,236, //ja f64 <_sk_store_8888_avx+0xa0>
4332 76,141,21,85,0,0,0, //lea 0x55(%rip),%r10 # fd4 <_sk_store_8888_avx+0x110>
Mike Klein894d5612017-03-07 07:59:52 -05004333 75,99,4,130, //movslq (%r10,%r8,4),%rax
4334 76,1,208, //add %r10,%rax
4335 255,224, //jmpq *%rax
4336 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4337 196,67,121,22,76,185,24,2, //vpextrd $0x2,%xmm9,0x18(%r9,%rdi,4)
4338 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4339 196,67,121,22,76,185,20,1, //vpextrd $0x1,%xmm9,0x14(%r9,%rdi,4)
4340 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
4341 196,65,121,126,76,185,16, //vmovd %xmm9,0x10(%r9,%rdi,4)
4342 196,67,121,22,68,185,12,3, //vpextrd $0x3,%xmm8,0xc(%r9,%rdi,4)
4343 196,67,121,22,68,185,8,2, //vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
4344 196,67,121,22,68,185,4,1, //vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
4345 196,65,121,126,4,185, //vmovd %xmm8,(%r9,%rdi,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004346 235,147, //jmp f64 <_sk_store_8888_avx+0xa0>
4347 15,31,0, //nopl (%rax)
4348 245, //cmc
Mike Klein894d5612017-03-07 07:59:52 -05004349 255, //(bad)
4350 255, //(bad)
4351 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004352 237, //in (%dx),%eax
Mike Klein894d5612017-03-07 07:59:52 -05004353 255, //(bad)
4354 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004355 255,229, //jmpq *%rbp
4356 255, //(bad)
4357 255, //(bad)
4358 255, //(bad)
4359 221,255, //(bad)
4360 255, //(bad)
4361 255,208, //callq *%rax
4362 255, //(bad)
4363 255, //(bad)
4364 255,194, //inc %edx
Mike Klein894d5612017-03-07 07:59:52 -05004365 255, //(bad)
4366 255, //(bad)
4367 255, //.byte 0xff
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004368 180,255, //mov $0xff,%ah
Mike Klein894d5612017-03-07 07:59:52 -05004369 255, //(bad)
4370 255, //.byte 0xff
4371};
4372
4373CODE const uint8_t sk_load_f16_avx[] = {
4374 72,173, //lods %ds:(%rsi),%rax
4375 72,139,0, //mov (%rax),%rax
4376 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004377 15,133,240,0,0,0, //jne 10ee <_sk_load_f16_avx+0xfe>
Mike Klein894d5612017-03-07 07:59:52 -05004378 197,249,16,12,248, //vmovupd (%rax,%rdi,8),%xmm1
4379 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
4380 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
4381 197,121,16,68,248,48, //vmovupd 0x30(%rax,%rdi,8),%xmm8
4382 197,241,97,194, //vpunpcklwd %xmm2,%xmm1,%xmm0
4383 197,241,105,202, //vpunpckhwd %xmm2,%xmm1,%xmm1
4384 196,193,97,97,208, //vpunpcklwd %xmm8,%xmm3,%xmm2
4385 196,193,97,105,216, //vpunpckhwd %xmm8,%xmm3,%xmm3
4386 197,121,97,193, //vpunpcklwd %xmm1,%xmm0,%xmm8
4387 197,249,105,193, //vpunpckhwd %xmm1,%xmm0,%xmm0
4388 197,233,97,203, //vpunpcklwd %xmm3,%xmm2,%xmm1
4389 197,105,105,203, //vpunpckhwd %xmm3,%xmm2,%xmm9
4390 197,249,110,90,100, //vmovd 0x64(%rdx),%xmm3
4391 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
4392 196,193,97,101,208, //vpcmpgtw %xmm8,%xmm3,%xmm2
4393 196,65,105,223,192, //vpandn %xmm8,%xmm2,%xmm8
4394 197,225,101,208, //vpcmpgtw %xmm0,%xmm3,%xmm2
4395 197,233,223,192, //vpandn %xmm0,%xmm2,%xmm0
4396 197,225,101,209, //vpcmpgtw %xmm1,%xmm3,%xmm2
4397 197,233,223,201, //vpandn %xmm1,%xmm2,%xmm1
4398 196,193,97,101,209, //vpcmpgtw %xmm9,%xmm3,%xmm2
4399 196,193,105,223,209, //vpandn %xmm9,%xmm2,%xmm2
4400 196,66,121,51,208, //vpmovzxwd %xmm8,%xmm10
4401 196,98,121,51,201, //vpmovzxwd %xmm1,%xmm9
4402 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
4403 197,57,105,195, //vpunpckhwd %xmm3,%xmm8,%xmm8
4404 197,241,105,203, //vpunpckhwd %xmm3,%xmm1,%xmm1
4405 196,98,121,51,216, //vpmovzxwd %xmm0,%xmm11
4406 196,98,121,51,226, //vpmovzxwd %xmm2,%xmm12
4407 197,121,105,235, //vpunpckhwd %xmm3,%xmm0,%xmm13
4408 197,105,105,243, //vpunpckhwd %xmm3,%xmm2,%xmm14
4409 196,193,121,114,242,13, //vpslld $0xd,%xmm10,%xmm0
4410 196,193,105,114,241,13, //vpslld $0xd,%xmm9,%xmm2
4411 196,227,125,24,194,1, //vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
4412 196,98,125,24,74,92, //vbroadcastss 0x5c(%rdx),%ymm9
4413 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
4414 196,193,105,114,240,13, //vpslld $0xd,%xmm8,%xmm2
4415 197,241,114,241,13, //vpslld $0xd,%xmm1,%xmm1
4416 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
4417 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
4418 196,193,105,114,243,13, //vpslld $0xd,%xmm11,%xmm2
4419 196,193,97,114,244,13, //vpslld $0xd,%xmm12,%xmm3
4420 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
4421 197,180,89,210, //vmulps %ymm2,%ymm9,%ymm2
4422 196,193,57,114,245,13, //vpslld $0xd,%xmm13,%xmm8
4423 196,193,97,114,246,13, //vpslld $0xd,%xmm14,%xmm3
4424 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
4425 197,180,89,219, //vmulps %ymm3,%ymm9,%ymm3
4426 72,173, //lods %ds:(%rsi),%rax
4427 255,224, //jmpq *%rax
4428 197,251,16,12,248, //vmovsd (%rax,%rdi,8),%xmm1
4429 196,65,57,87,192, //vxorpd %xmm8,%xmm8,%xmm8
4430 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004431 117,6, //jne 1104 <_sk_load_f16_avx+0x114>
Mike Klein894d5612017-03-07 07:59:52 -05004432 197,250,126,201, //vmovq %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004433 235,30, //jmp 1122 <_sk_load_f16_avx+0x132>
Mike Klein894d5612017-03-07 07:59:52 -05004434 197,241,22,76,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm1,%xmm1
4435 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004436 114,18, //jb 1122 <_sk_load_f16_avx+0x132>
Mike Klein894d5612017-03-07 07:59:52 -05004437 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
4438 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004439 117,19, //jne 112f <_sk_load_f16_avx+0x13f>
Mike Klein894d5612017-03-07 07:59:52 -05004440 197,250,126,210, //vmovq %xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004441 235,46, //jmp 1150 <_sk_load_f16_avx+0x160>
Mike Klein894d5612017-03-07 07:59:52 -05004442 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
4443 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004444 233,230,254,255,255, //jmpq 1015 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05004445 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
4446 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004447 114,21, //jb 1150 <_sk_load_f16_avx+0x160>
Mike Klein894d5612017-03-07 07:59:52 -05004448 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
4449 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004450 117,18, //jne 1159 <_sk_load_f16_avx+0x169>
Mike Klein894d5612017-03-07 07:59:52 -05004451 197,250,126,219, //vmovq %xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004452 233,197,254,255,255, //jmpq 1015 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05004453 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004454 233,188,254,255,255, //jmpq 1015 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05004455 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
4456 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004457 15,130,172,254,255,255, //jb 1015 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05004458 197,123,16,68,248,48, //vmovsd 0x30(%rax,%rdi,8),%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004459 233,161,254,255,255, //jmpq 1015 <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05004460};
4461
4462CODE const uint8_t sk_store_f16_avx[] = {
4463 72,173, //lods %ds:(%rsi),%rax
4464 72,139,0, //mov (%rax),%rax
4465 196,98,125,24,66,96, //vbroadcastss 0x60(%rdx),%ymm8
4466 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
4467 196,67,125,25,202,1, //vextractf128 $0x1,%ymm9,%xmm10
4468 196,193,41,114,210,13, //vpsrld $0xd,%xmm10,%xmm10
4469 196,193,49,114,209,13, //vpsrld $0xd,%xmm9,%xmm9
4470 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
4471 196,67,125,25,220,1, //vextractf128 $0x1,%ymm11,%xmm12
4472 196,193,25,114,212,13, //vpsrld $0xd,%xmm12,%xmm12
4473 196,193,33,114,211,13, //vpsrld $0xd,%xmm11,%xmm11
4474 197,60,89,234, //vmulps %ymm2,%ymm8,%ymm13
4475 196,67,125,25,238,1, //vextractf128 $0x1,%ymm13,%xmm14
4476 196,193,9,114,214,13, //vpsrld $0xd,%xmm14,%xmm14
4477 196,193,17,114,213,13, //vpsrld $0xd,%xmm13,%xmm13
4478 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
4479 196,67,125,25,199,1, //vextractf128 $0x1,%ymm8,%xmm15
4480 196,193,1,114,215,13, //vpsrld $0xd,%xmm15,%xmm15
4481 196,193,57,114,208,13, //vpsrld $0xd,%xmm8,%xmm8
4482 196,193,33,115,251,2, //vpslldq $0x2,%xmm11,%xmm11
4483 196,65,33,235,201, //vpor %xmm9,%xmm11,%xmm9
4484 196,193,33,115,252,2, //vpslldq $0x2,%xmm12,%xmm11
4485 196,65,33,235,226, //vpor %xmm10,%xmm11,%xmm12
4486 196,193,57,115,248,2, //vpslldq $0x2,%xmm8,%xmm8
4487 196,65,57,235,197, //vpor %xmm13,%xmm8,%xmm8
4488 196,193,41,115,255,2, //vpslldq $0x2,%xmm15,%xmm10
4489 196,65,41,235,238, //vpor %xmm14,%xmm10,%xmm13
4490 196,65,49,98,216, //vpunpckldq %xmm8,%xmm9,%xmm11
4491 196,65,49,106,208, //vpunpckhdq %xmm8,%xmm9,%xmm10
4492 196,65,25,98,205, //vpunpckldq %xmm13,%xmm12,%xmm9
4493 196,65,25,106,197, //vpunpckhdq %xmm13,%xmm12,%xmm8
4494 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004495 117,27, //jne 1237 <_sk_store_f16_avx+0xc3>
Mike Klein894d5612017-03-07 07:59:52 -05004496 197,120,17,28,248, //vmovups %xmm11,(%rax,%rdi,8)
4497 197,120,17,84,248,16, //vmovups %xmm10,0x10(%rax,%rdi,8)
4498 197,120,17,76,248,32, //vmovups %xmm9,0x20(%rax,%rdi,8)
4499 197,122,127,68,248,48, //vmovdqu %xmm8,0x30(%rax,%rdi,8)
4500 72,173, //lods %ds:(%rsi),%rax
4501 255,224, //jmpq *%rax
4502 197,121,214,28,248, //vmovq %xmm11,(%rax,%rdi,8)
4503 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004504 116,241, //je 1233 <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05004505 197,121,23,92,248,8, //vmovhpd %xmm11,0x8(%rax,%rdi,8)
4506 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004507 114,229, //jb 1233 <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05004508 197,121,214,84,248,16, //vmovq %xmm10,0x10(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004509 116,221, //je 1233 <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05004510 197,121,23,84,248,24, //vmovhpd %xmm10,0x18(%rax,%rdi,8)
4511 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004512 114,209, //jb 1233 <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05004513 197,121,214,76,248,32, //vmovq %xmm9,0x20(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004514 116,201, //je 1233 <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05004515 197,121,23,76,248,40, //vmovhpd %xmm9,0x28(%rax,%rdi,8)
4516 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004517 114,189, //jb 1233 <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05004518 197,121,214,68,248,48, //vmovq %xmm8,0x30(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004519 235,181, //jmp 1233 <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05004520};
4521
4522CODE const uint8_t sk_store_f32_avx[] = {
4523 72,173, //lods %ds:(%rsi),%rax
4524 76,139,0, //mov (%rax),%r8
4525 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
4526 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
4527 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
4528 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
4529 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
4530 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
4531 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
4532 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
4533 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
4534 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004535 117,55, //jne 12eb <_sk_store_f32_avx+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05004536 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
4537 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
4538 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
4539 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
4540 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
4541 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
4542 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
4543 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
4544 72,173, //lods %ds:(%rsi),%rax
4545 255,224, //jmpq *%rax
4546 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
4547 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004548 116,240, //je 12e7 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004549 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
4550 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004551 114,227, //jb 12e7 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004552 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004553 116,218, //je 12e7 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004554 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
4555 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004556 114,205, //jb 12e7 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004557 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004558 116,195, //je 12e7 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004559 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
4560 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004561 114,181, //jb 12e7 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004562 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004563 235,171, //jmp 12e7 <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05004564};
4565
4566CODE const uint8_t sk_clamp_x_avx[] = {
4567 72,173, //lods %ds:(%rsi),%rax
4568 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4569 197,60,95,200, //vmaxps %ymm0,%ymm8,%ymm9
4570 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4571 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
4572 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4573 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
4574 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4575 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
4576 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
4577 72,173, //lods %ds:(%rsi),%rax
4578 255,224, //jmpq *%rax
4579};
4580
4581CODE const uint8_t sk_clamp_y_avx[] = {
4582 72,173, //lods %ds:(%rsi),%rax
4583 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4584 197,60,95,201, //vmaxps %ymm1,%ymm8,%ymm9
4585 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4586 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
4587 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4588 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
4589 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4590 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
4591 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
4592 72,173, //lods %ds:(%rsi),%rax
4593 255,224, //jmpq *%rax
4594};
4595
4596CODE const uint8_t sk_repeat_x_avx[] = {
4597 72,173, //lods %ds:(%rsi),%rax
4598 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4599 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
4600 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
4601 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
4602 196,65,124,92,201, //vsubps %ymm9,%ymm0,%ymm9
4603 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
4604 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4605 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
4606 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4607 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
4608 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
4609 72,173, //lods %ds:(%rsi),%rax
4610 255,224, //jmpq *%rax
4611};
4612
4613CODE const uint8_t sk_repeat_y_avx[] = {
4614 72,173, //lods %ds:(%rsi),%rax
4615 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4616 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
4617 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
4618 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
4619 196,65,116,92,201, //vsubps %ymm9,%ymm1,%ymm9
4620 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
4621 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4622 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
4623 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
4624 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
4625 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
4626 72,173, //lods %ds:(%rsi),%rax
4627 255,224, //jmpq *%rax
4628};
4629
4630CODE const uint8_t sk_mirror_x_avx[] = {
4631 72,173, //lods %ds:(%rsi),%rax
4632 197,122,16,0, //vmovss (%rax),%xmm8
4633 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
4634 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
4635 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
4636 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
4637 196,227,121,4,192,0, //vpermilps $0x0,%xmm0,%xmm0
4638 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
4639 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
4640 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
4641 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
4642 197,172,92,192, //vsubps %ymm0,%ymm10,%ymm0
4643 196,193,124,92,193, //vsubps %ymm9,%ymm0,%ymm0
4644 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4645 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
4646 197,60,84,192, //vandps %ymm0,%ymm8,%ymm8
4647 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
4648 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4649 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
4650 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
4651 196,227,53,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
4652 197,188,93,192, //vminps %ymm0,%ymm8,%ymm0
4653 72,173, //lods %ds:(%rsi),%rax
4654 255,224, //jmpq *%rax
4655};
4656
4657CODE const uint8_t sk_mirror_y_avx[] = {
4658 72,173, //lods %ds:(%rsi),%rax
4659 197,122,16,0, //vmovss (%rax),%xmm8
4660 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
4661 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
4662 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
4663 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
4664 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
4665 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
4666 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
4667 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
4668 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
4669 197,172,92,201, //vsubps %ymm1,%ymm10,%ymm1
4670 196,193,116,92,201, //vsubps %ymm9,%ymm1,%ymm1
4671 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
4672 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
4673 197,60,84,193, //vandps %ymm1,%ymm8,%ymm8
4674 196,99,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm1
4675 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
4676 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
4677 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
4678 196,227,53,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm9,%ymm1
4679 197,188,93,201, //vminps %ymm1,%ymm8,%ymm1
4680 72,173, //lods %ds:(%rsi),%rax
4681 255,224, //jmpq *%rax
4682};
4683
Mike Kleine9ed07d2017-03-07 12:28:11 -05004684CODE const uint8_t sk_luminance_to_alpha_avx[] = {
4685 196,226,125,24,154,136,0,0,0, //vbroadcastss 0x88(%rdx),%ymm3
4686 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
4687 196,226,125,24,154,140,0,0,0, //vbroadcastss 0x8c(%rdx),%ymm3
4688 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
4689 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
4690 196,226,125,24,138,144,0,0,0, //vbroadcastss 0x90(%rdx),%ymm1
4691 197,244,89,202, //vmulps %ymm2,%ymm1,%ymm1
4692 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
4693 72,173, //lods %ds:(%rsi),%rax
4694 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
4695 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
4696 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
4697 255,224, //jmpq *%rax
4698};
4699
Mike Klein894d5612017-03-07 07:59:52 -05004700CODE const uint8_t sk_matrix_2x3_avx[] = {
4701 72,173, //lods %ds:(%rsi),%rax
4702 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4703 196,98,125,24,72,8, //vbroadcastss 0x8(%rax),%ymm9
4704 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
4705 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
4706 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4707 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
4708 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
4709 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
4710 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
4711 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
4712 197,172,89,201, //vmulps %ymm1,%ymm10,%ymm1
4713 196,193,116,88,203, //vaddps %ymm11,%ymm1,%ymm1
4714 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
4715 197,252,88,201, //vaddps %ymm1,%ymm0,%ymm1
4716 72,173, //lods %ds:(%rsi),%rax
4717 197,124,41,192, //vmovaps %ymm8,%ymm0
4718 255,224, //jmpq *%rax
4719};
4720
4721CODE const uint8_t sk_matrix_3x4_avx[] = {
4722 72,173, //lods %ds:(%rsi),%rax
4723 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4724 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
4725 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
4726 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
4727 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
4728 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4729 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
4730 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4731 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
4732 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
4733 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
4734 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
4735 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
4736 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
4737 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
4738 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
4739 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
4740 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4741 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
4742 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4743 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
4744 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
4745 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
4746 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
4747 197,156,89,210, //vmulps %ymm2,%ymm12,%ymm2
4748 196,193,108,88,213, //vaddps %ymm13,%ymm2,%ymm2
4749 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
4750 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
4751 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
4752 197,252,88,209, //vaddps %ymm1,%ymm0,%ymm2
4753 72,173, //lods %ds:(%rsi),%rax
4754 197,124,41,192, //vmovaps %ymm8,%ymm0
4755 197,124,41,201, //vmovaps %ymm9,%ymm1
4756 255,224, //jmpq *%rax
4757};
4758
Mike Kleine9ed07d2017-03-07 12:28:11 -05004759CODE const uint8_t sk_matrix_4x5_avx[] = {
4760 72,173, //lods %ds:(%rsi),%rax
4761 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4762 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
4763 196,98,125,24,80,32, //vbroadcastss 0x20(%rax),%ymm10
4764 196,98,125,24,88,48, //vbroadcastss 0x30(%rax),%ymm11
4765 196,98,125,24,96,64, //vbroadcastss 0x40(%rax),%ymm12
4766 197,36,89,219, //vmulps %ymm3,%ymm11,%ymm11
4767 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
4768 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
4769 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4770 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
4771 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4772 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
4773 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
4774 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
4775 196,98,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm10
4776 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
4777 196,98,125,24,96,52, //vbroadcastss 0x34(%rax),%ymm12
4778 196,98,125,24,104,68, //vbroadcastss 0x44(%rax),%ymm13
4779 197,28,89,227, //vmulps %ymm3,%ymm12,%ymm12
4780 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
4781 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
4782 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
4783 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
4784 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4785 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
4786 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4787 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
4788 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
4789 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
4790 196,98,125,24,104,56, //vbroadcastss 0x38(%rax),%ymm13
4791 196,98,125,24,112,72, //vbroadcastss 0x48(%rax),%ymm14
4792 197,20,89,235, //vmulps %ymm3,%ymm13,%ymm13
4793 196,65,20,88,238, //vaddps %ymm14,%ymm13,%ymm13
4794 197,28,89,226, //vmulps %ymm2,%ymm12,%ymm12
4795 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
4796 197,36,89,217, //vmulps %ymm1,%ymm11,%ymm11
4797 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
4798 197,44,89,208, //vmulps %ymm0,%ymm10,%ymm10
4799 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4800 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
4801 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
4802 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
4803 196,98,125,24,112,60, //vbroadcastss 0x3c(%rax),%ymm14
4804 196,98,125,24,120,76, //vbroadcastss 0x4c(%rax),%ymm15
4805 197,140,89,219, //vmulps %ymm3,%ymm14,%ymm3
4806 196,193,100,88,223, //vaddps %ymm15,%ymm3,%ymm3
4807 197,148,89,210, //vmulps %ymm2,%ymm13,%ymm2
4808 197,236,88,211, //vaddps %ymm3,%ymm2,%ymm2
4809 197,156,89,201, //vmulps %ymm1,%ymm12,%ymm1
4810 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
4811 197,164,89,192, //vmulps %ymm0,%ymm11,%ymm0
4812 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
4813 72,173, //lods %ds:(%rsi),%rax
4814 197,124,41,192, //vmovaps %ymm8,%ymm0
4815 197,124,41,201, //vmovaps %ymm9,%ymm1
4816 197,124,41,210, //vmovaps %ymm10,%ymm2
4817 255,224, //jmpq *%rax
4818};
4819
Mike Klein894d5612017-03-07 07:59:52 -05004820CODE const uint8_t sk_matrix_perspective_avx[] = {
4821 72,173, //lods %ds:(%rsi),%rax
4822 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
4823 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
4824 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
4825 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
4826 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4827 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
4828 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
4829 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
4830 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
4831 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
4832 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
4833 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
4834 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
4835 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
4836 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
4837 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
4838 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
4839 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
4840 196,193,116,88,204, //vaddps %ymm12,%ymm1,%ymm1
4841 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
4842 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
4843 197,252,83,200, //vrcpps %ymm0,%ymm1
4844 197,188,89,193, //vmulps %ymm1,%ymm8,%ymm0
4845 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
4846 72,173, //lods %ds:(%rsi),%rax
4847 255,224, //jmpq *%rax
4848};
4849
4850CODE const uint8_t sk_linear_gradient_2stops_avx[] = {
4851 72,173, //lods %ds:(%rsi),%rax
4852 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
4853 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
4854 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
4855 197,108,88,193, //vaddps %ymm1,%ymm2,%ymm8
4856 196,226,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm1
4857 196,226,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm2
4858 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
4859 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
4860 196,226,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm2
4861 196,226,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm3
4862 197,236,89,208, //vmulps %ymm0,%ymm2,%ymm2
4863 197,228,88,210, //vaddps %ymm2,%ymm3,%ymm2
4864 196,226,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm3
4865 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
4866 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
4867 197,180,88,216, //vaddps %ymm0,%ymm9,%ymm3
4868 72,173, //lods %ds:(%rsi),%rax
4869 197,124,41,192, //vmovaps %ymm8,%ymm0
4870 255,224, //jmpq *%rax
4871};
4872
4873CODE const uint8_t sk_start_pipeline_sse41[] = {
4874 65,87, //push %r15
4875 65,86, //push %r14
4876 65,85, //push %r13
4877 65,84, //push %r12
4878 83, //push %rbx
4879 73,137,207, //mov %rcx,%r15
4880 73,137,214, //mov %rdx,%r14
4881 72,137,251, //mov %rdi,%rbx
4882 72,173, //lods %ds:(%rsi),%rax
4883 73,137,196, //mov %rax,%r12
4884 73,137,245, //mov %rsi,%r13
4885 72,141,67,4, //lea 0x4(%rbx),%rax
4886 76,57,248, //cmp %r15,%rax
4887 118,5, //jbe 28 <_sk_start_pipeline_sse41+0x28>
4888 72,137,216, //mov %rbx,%rax
4889 235,52, //jmp 5c <_sk_start_pipeline_sse41+0x5c>
4890 15,87,192, //xorps %xmm0,%xmm0
4891 15,87,201, //xorps %xmm1,%xmm1
4892 15,87,210, //xorps %xmm2,%xmm2
4893 15,87,219, //xorps %xmm3,%xmm3
4894 15,87,228, //xorps %xmm4,%xmm4
4895 15,87,237, //xorps %xmm5,%xmm5
4896 15,87,246, //xorps %xmm6,%xmm6
4897 15,87,255, //xorps %xmm7,%xmm7
4898 72,137,223, //mov %rbx,%rdi
4899 76,137,238, //mov %r13,%rsi
4900 76,137,242, //mov %r14,%rdx
4901 65,255,212, //callq *%r12
4902 72,141,67,4, //lea 0x4(%rbx),%rax
4903 72,131,195,8, //add $0x8,%rbx
4904 76,57,251, //cmp %r15,%rbx
4905 72,137,195, //mov %rax,%rbx
4906 118,204, //jbe 28 <_sk_start_pipeline_sse41+0x28>
4907 91, //pop %rbx
4908 65,92, //pop %r12
4909 65,93, //pop %r13
4910 65,94, //pop %r14
4911 65,95, //pop %r15
4912 195, //retq
4913};
4914
4915CODE const uint8_t sk_just_return_sse41[] = {
4916 195, //retq
4917};
4918
4919CODE const uint8_t sk_seed_shader_sse41[] = {
4920 72,173, //lods %ds:(%rsi),%rax
4921 102,15,110,199, //movd %edi,%xmm0
4922 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
4923 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004924 185,0,0,0,63, //mov $0x3f000000,%ecx
4925 102,15,110,209, //movd %ecx,%xmm2
4926 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
4927 15,88,202, //addps %xmm2,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05004928 15,16,66,20, //movups 0x14(%rdx),%xmm0
4929 15,88,193, //addps %xmm1,%xmm0
4930 102,15,110,8, //movd (%rax),%xmm1
4931 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
4932 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004933 15,88,202, //addps %xmm2,%xmm1
4934 184,0,0,128,63, //mov $0x3f800000,%eax
4935 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05004936 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
4937 72,173, //lods %ds:(%rsi),%rax
4938 15,87,219, //xorps %xmm3,%xmm3
4939 15,87,228, //xorps %xmm4,%xmm4
4940 15,87,237, //xorps %xmm5,%xmm5
4941 15,87,246, //xorps %xmm6,%xmm6
4942 15,87,255, //xorps %xmm7,%xmm7
4943 255,224, //jmpq *%rax
4944};
4945
4946CODE const uint8_t sk_constant_color_sse41[] = {
4947 72,173, //lods %ds:(%rsi),%rax
4948 15,16,24, //movups (%rax),%xmm3
4949 15,40,195, //movaps %xmm3,%xmm0
4950 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
4951 15,40,203, //movaps %xmm3,%xmm1
4952 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
4953 15,40,211, //movaps %xmm3,%xmm2
4954 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
4955 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
4956 72,173, //lods %ds:(%rsi),%rax
4957 255,224, //jmpq *%rax
4958};
4959
4960CODE const uint8_t sk_clear_sse41[] = {
4961 72,173, //lods %ds:(%rsi),%rax
4962 15,87,192, //xorps %xmm0,%xmm0
4963 15,87,201, //xorps %xmm1,%xmm1
4964 15,87,210, //xorps %xmm2,%xmm2
4965 15,87,219, //xorps %xmm3,%xmm3
4966 255,224, //jmpq *%rax
4967};
4968
4969CODE const uint8_t sk_plus__sse41[] = {
4970 15,88,196, //addps %xmm4,%xmm0
4971 15,88,205, //addps %xmm5,%xmm1
4972 15,88,214, //addps %xmm6,%xmm2
4973 15,88,223, //addps %xmm7,%xmm3
4974 72,173, //lods %ds:(%rsi),%rax
4975 255,224, //jmpq *%rax
4976};
4977
4978CODE const uint8_t sk_srcover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004979 184,0,0,128,63, //mov $0x3f800000,%eax
4980 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05004981 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
4982 68,15,92,195, //subps %xmm3,%xmm8
4983 69,15,40,200, //movaps %xmm8,%xmm9
4984 68,15,89,204, //mulps %xmm4,%xmm9
4985 65,15,88,193, //addps %xmm9,%xmm0
4986 69,15,40,200, //movaps %xmm8,%xmm9
4987 68,15,89,205, //mulps %xmm5,%xmm9
4988 65,15,88,201, //addps %xmm9,%xmm1
4989 69,15,40,200, //movaps %xmm8,%xmm9
4990 68,15,89,206, //mulps %xmm6,%xmm9
4991 65,15,88,209, //addps %xmm9,%xmm2
4992 68,15,89,199, //mulps %xmm7,%xmm8
4993 65,15,88,216, //addps %xmm8,%xmm3
4994 72,173, //lods %ds:(%rsi),%rax
4995 255,224, //jmpq *%rax
4996};
4997
4998CODE const uint8_t sk_dstover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05004999 184,0,0,128,63, //mov $0x3f800000,%eax
5000 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005001 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5002 68,15,92,199, //subps %xmm7,%xmm8
5003 65,15,89,192, //mulps %xmm8,%xmm0
5004 15,88,196, //addps %xmm4,%xmm0
5005 65,15,89,200, //mulps %xmm8,%xmm1
5006 15,88,205, //addps %xmm5,%xmm1
5007 65,15,89,208, //mulps %xmm8,%xmm2
5008 15,88,214, //addps %xmm6,%xmm2
5009 65,15,89,216, //mulps %xmm8,%xmm3
5010 15,88,223, //addps %xmm7,%xmm3
5011 72,173, //lods %ds:(%rsi),%rax
5012 255,224, //jmpq *%rax
5013};
5014
5015CODE const uint8_t sk_clamp_0_sse41[] = {
5016 69,15,87,192, //xorps %xmm8,%xmm8
5017 65,15,95,192, //maxps %xmm8,%xmm0
5018 65,15,95,200, //maxps %xmm8,%xmm1
5019 65,15,95,208, //maxps %xmm8,%xmm2
5020 65,15,95,216, //maxps %xmm8,%xmm3
5021 72,173, //lods %ds:(%rsi),%rax
5022 255,224, //jmpq *%rax
5023};
5024
5025CODE const uint8_t sk_clamp_1_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005026 184,0,0,128,63, //mov $0x3f800000,%eax
5027 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005028 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5029 65,15,93,192, //minps %xmm8,%xmm0
5030 65,15,93,200, //minps %xmm8,%xmm1
5031 65,15,93,208, //minps %xmm8,%xmm2
5032 65,15,93,216, //minps %xmm8,%xmm3
5033 72,173, //lods %ds:(%rsi),%rax
5034 255,224, //jmpq *%rax
5035};
5036
5037CODE const uint8_t sk_clamp_a_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005038 184,0,0,128,63, //mov $0x3f800000,%eax
5039 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005040 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5041 65,15,93,216, //minps %xmm8,%xmm3
5042 15,93,195, //minps %xmm3,%xmm0
5043 15,93,203, //minps %xmm3,%xmm1
5044 15,93,211, //minps %xmm3,%xmm2
5045 72,173, //lods %ds:(%rsi),%rax
5046 255,224, //jmpq *%rax
5047};
5048
5049CODE const uint8_t sk_set_rgb_sse41[] = {
5050 72,173, //lods %ds:(%rsi),%rax
5051 243,15,16,0, //movss (%rax),%xmm0
5052 243,15,16,72,4, //movss 0x4(%rax),%xmm1
5053 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5054 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
5055 243,15,16,80,8, //movss 0x8(%rax),%xmm2
5056 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
5057 72,173, //lods %ds:(%rsi),%rax
5058 255,224, //jmpq *%rax
5059};
5060
5061CODE const uint8_t sk_swap_rb_sse41[] = {
5062 68,15,40,192, //movaps %xmm0,%xmm8
5063 72,173, //lods %ds:(%rsi),%rax
5064 15,40,194, //movaps %xmm2,%xmm0
5065 65,15,40,208, //movaps %xmm8,%xmm2
5066 255,224, //jmpq *%rax
5067};
5068
5069CODE const uint8_t sk_swap_sse41[] = {
5070 68,15,40,195, //movaps %xmm3,%xmm8
5071 68,15,40,202, //movaps %xmm2,%xmm9
5072 68,15,40,209, //movaps %xmm1,%xmm10
5073 68,15,40,216, //movaps %xmm0,%xmm11
5074 72,173, //lods %ds:(%rsi),%rax
5075 15,40,196, //movaps %xmm4,%xmm0
5076 15,40,205, //movaps %xmm5,%xmm1
5077 15,40,214, //movaps %xmm6,%xmm2
5078 15,40,223, //movaps %xmm7,%xmm3
5079 65,15,40,227, //movaps %xmm11,%xmm4
5080 65,15,40,234, //movaps %xmm10,%xmm5
5081 65,15,40,241, //movaps %xmm9,%xmm6
5082 65,15,40,248, //movaps %xmm8,%xmm7
5083 255,224, //jmpq *%rax
5084};
5085
5086CODE const uint8_t sk_move_src_dst_sse41[] = {
5087 72,173, //lods %ds:(%rsi),%rax
5088 15,40,224, //movaps %xmm0,%xmm4
5089 15,40,233, //movaps %xmm1,%xmm5
5090 15,40,242, //movaps %xmm2,%xmm6
5091 15,40,251, //movaps %xmm3,%xmm7
5092 255,224, //jmpq *%rax
5093};
5094
5095CODE const uint8_t sk_move_dst_src_sse41[] = {
5096 72,173, //lods %ds:(%rsi),%rax
5097 15,40,196, //movaps %xmm4,%xmm0
5098 15,40,205, //movaps %xmm5,%xmm1
5099 15,40,214, //movaps %xmm6,%xmm2
5100 15,40,223, //movaps %xmm7,%xmm3
5101 255,224, //jmpq *%rax
5102};
5103
5104CODE const uint8_t sk_premul_sse41[] = {
5105 15,89,195, //mulps %xmm3,%xmm0
5106 15,89,203, //mulps %xmm3,%xmm1
5107 15,89,211, //mulps %xmm3,%xmm2
5108 72,173, //lods %ds:(%rsi),%rax
5109 255,224, //jmpq *%rax
5110};
5111
5112CODE const uint8_t sk_unpremul_sse41[] = {
5113 68,15,40,192, //movaps %xmm0,%xmm8
5114 69,15,87,201, //xorps %xmm9,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005115 184,0,0,128,63, //mov $0x3f800000,%eax
5116 102,68,15,110,208, //movd %eax,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05005117 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5118 68,15,94,211, //divps %xmm3,%xmm10
5119 15,40,195, //movaps %xmm3,%xmm0
5120 65,15,194,193,0, //cmpeqps %xmm9,%xmm0
5121 102,69,15,56,20,209, //blendvps %xmm0,%xmm9,%xmm10
5122 69,15,89,194, //mulps %xmm10,%xmm8
5123 65,15,89,202, //mulps %xmm10,%xmm1
5124 65,15,89,210, //mulps %xmm10,%xmm2
5125 72,173, //lods %ds:(%rsi),%rax
5126 65,15,40,192, //movaps %xmm8,%xmm0
5127 255,224, //jmpq *%rax
5128};
5129
5130CODE const uint8_t sk_from_srgb_sse41[] = {
5131 68,15,40,194, //movaps %xmm2,%xmm8
5132 243,68,15,16,90,64, //movss 0x40(%rdx),%xmm11
5133 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5134 69,15,40,211, //movaps %xmm11,%xmm10
5135 68,15,89,208, //mulps %xmm0,%xmm10
5136 68,15,40,240, //movaps %xmm0,%xmm14
5137 69,15,89,246, //mulps %xmm14,%xmm14
5138 243,15,16,82,60, //movss 0x3c(%rdx),%xmm2
5139 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
5140 243,68,15,16,98,52, //movss 0x34(%rdx),%xmm12
5141 243,68,15,16,106,56, //movss 0x38(%rdx),%xmm13
5142 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5143 68,15,40,202, //movaps %xmm2,%xmm9
5144 68,15,89,200, //mulps %xmm0,%xmm9
5145 69,15,88,205, //addps %xmm13,%xmm9
5146 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5147 69,15,89,206, //mulps %xmm14,%xmm9
5148 69,15,88,204, //addps %xmm12,%xmm9
5149 243,68,15,16,114,68, //movss 0x44(%rdx),%xmm14
5150 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
5151 65,15,194,198,1, //cmpltps %xmm14,%xmm0
5152 102,69,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm9
5153 69,15,40,251, //movaps %xmm11,%xmm15
5154 68,15,89,249, //mulps %xmm1,%xmm15
5155 15,40,193, //movaps %xmm1,%xmm0
5156 15,89,192, //mulps %xmm0,%xmm0
5157 68,15,40,210, //movaps %xmm2,%xmm10
5158 68,15,89,209, //mulps %xmm1,%xmm10
5159 69,15,88,213, //addps %xmm13,%xmm10
5160 68,15,89,208, //mulps %xmm0,%xmm10
5161 69,15,88,212, //addps %xmm12,%xmm10
5162 65,15,194,206,1, //cmpltps %xmm14,%xmm1
5163 15,40,193, //movaps %xmm1,%xmm0
5164 102,69,15,56,20,215, //blendvps %xmm0,%xmm15,%xmm10
5165 69,15,89,216, //mulps %xmm8,%xmm11
5166 65,15,40,192, //movaps %xmm8,%xmm0
5167 15,89,192, //mulps %xmm0,%xmm0
5168 65,15,89,208, //mulps %xmm8,%xmm2
5169 65,15,88,213, //addps %xmm13,%xmm2
5170 15,89,208, //mulps %xmm0,%xmm2
5171 65,15,88,212, //addps %xmm12,%xmm2
5172 69,15,194,198,1, //cmpltps %xmm14,%xmm8
5173 65,15,40,192, //movaps %xmm8,%xmm0
5174 102,65,15,56,20,211, //blendvps %xmm0,%xmm11,%xmm2
5175 72,173, //lods %ds:(%rsi),%rax
5176 65,15,40,193, //movaps %xmm9,%xmm0
5177 65,15,40,202, //movaps %xmm10,%xmm1
5178 255,224, //jmpq *%rax
5179};
5180
5181CODE const uint8_t sk_to_srgb_sse41[] = {
5182 72,131,236,24, //sub $0x18,%rsp
5183 15,41,60,36, //movaps %xmm7,(%rsp)
5184 15,40,254, //movaps %xmm6,%xmm7
5185 15,40,245, //movaps %xmm5,%xmm6
5186 15,40,236, //movaps %xmm4,%xmm5
5187 15,40,227, //movaps %xmm3,%xmm4
5188 68,15,40,194, //movaps %xmm2,%xmm8
5189 15,40,217, //movaps %xmm1,%xmm3
5190 15,82,208, //rsqrtps %xmm0,%xmm2
5191 68,15,83,202, //rcpps %xmm2,%xmm9
5192 68,15,82,210, //rsqrtps %xmm2,%xmm10
5193 243,15,16,18, //movss (%rdx),%xmm2
5194 243,68,15,16,90,72, //movss 0x48(%rdx),%xmm11
5195 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5196 65,15,40,203, //movaps %xmm11,%xmm1
5197 15,89,200, //mulps %xmm0,%xmm1
5198 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
5199 243,68,15,16,98,76, //movss 0x4c(%rdx),%xmm12
5200 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5201 243,68,15,16,106,80, //movss 0x50(%rdx),%xmm13
5202 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5203 243,68,15,16,114,84, //movss 0x54(%rdx),%xmm14
5204 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
5205 69,15,89,205, //mulps %xmm13,%xmm9
5206 69,15,88,206, //addps %xmm14,%xmm9
5207 69,15,89,212, //mulps %xmm12,%xmm10
5208 69,15,88,209, //addps %xmm9,%xmm10
5209 68,15,40,202, //movaps %xmm2,%xmm9
5210 69,15,93,202, //minps %xmm10,%xmm9
5211 243,68,15,16,122,88, //movss 0x58(%rdx),%xmm15
5212 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
5213 65,15,194,199,1, //cmpltps %xmm15,%xmm0
5214 102,68,15,56,20,201, //blendvps %xmm0,%xmm1,%xmm9
5215 15,82,195, //rsqrtps %xmm3,%xmm0
5216 15,83,200, //rcpps %xmm0,%xmm1
5217 15,82,192, //rsqrtps %xmm0,%xmm0
5218 65,15,89,205, //mulps %xmm13,%xmm1
5219 65,15,88,206, //addps %xmm14,%xmm1
5220 65,15,89,196, //mulps %xmm12,%xmm0
5221 15,88,193, //addps %xmm1,%xmm0
5222 68,15,40,210, //movaps %xmm2,%xmm10
5223 68,15,93,208, //minps %xmm0,%xmm10
5224 65,15,40,203, //movaps %xmm11,%xmm1
5225 15,89,203, //mulps %xmm3,%xmm1
5226 65,15,194,223,1, //cmpltps %xmm15,%xmm3
5227 15,40,195, //movaps %xmm3,%xmm0
5228 102,68,15,56,20,209, //blendvps %xmm0,%xmm1,%xmm10
5229 65,15,82,192, //rsqrtps %xmm8,%xmm0
5230 15,83,200, //rcpps %xmm0,%xmm1
5231 65,15,89,205, //mulps %xmm13,%xmm1
5232 65,15,88,206, //addps %xmm14,%xmm1
5233 15,82,192, //rsqrtps %xmm0,%xmm0
5234 65,15,89,196, //mulps %xmm12,%xmm0
5235 15,88,193, //addps %xmm1,%xmm0
5236 15,93,208, //minps %xmm0,%xmm2
5237 69,15,89,216, //mulps %xmm8,%xmm11
5238 69,15,194,199,1, //cmpltps %xmm15,%xmm8
5239 65,15,40,192, //movaps %xmm8,%xmm0
5240 102,65,15,56,20,211, //blendvps %xmm0,%xmm11,%xmm2
5241 72,173, //lods %ds:(%rsi),%rax
5242 65,15,40,193, //movaps %xmm9,%xmm0
5243 65,15,40,202, //movaps %xmm10,%xmm1
5244 15,40,220, //movaps %xmm4,%xmm3
5245 15,40,229, //movaps %xmm5,%xmm4
5246 15,40,238, //movaps %xmm6,%xmm5
5247 15,40,247, //movaps %xmm7,%xmm6
5248 15,40,60,36, //movaps (%rsp),%xmm7
5249 72,131,196,24, //add $0x18,%rsp
5250 255,224, //jmpq *%rax
5251};
5252
5253CODE const uint8_t sk_scale_1_float_sse41[] = {
5254 72,173, //lods %ds:(%rsi),%rax
5255 243,68,15,16,0, //movss (%rax),%xmm8
5256 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5257 65,15,89,192, //mulps %xmm8,%xmm0
5258 65,15,89,200, //mulps %xmm8,%xmm1
5259 65,15,89,208, //mulps %xmm8,%xmm2
5260 65,15,89,216, //mulps %xmm8,%xmm3
5261 72,173, //lods %ds:(%rsi),%rax
5262 255,224, //jmpq *%rax
5263};
5264
5265CODE const uint8_t sk_scale_u8_sse41[] = {
5266 72,173, //lods %ds:(%rsi),%rax
5267 72,139,0, //mov (%rax),%rax
5268 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
5269 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005270 184,129,128,128,59, //mov $0x3b808081,%eax
5271 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05005272 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5273 69,15,89,200, //mulps %xmm8,%xmm9
5274 65,15,89,193, //mulps %xmm9,%xmm0
5275 65,15,89,201, //mulps %xmm9,%xmm1
5276 65,15,89,209, //mulps %xmm9,%xmm2
5277 65,15,89,217, //mulps %xmm9,%xmm3
5278 72,173, //lods %ds:(%rsi),%rax
5279 255,224, //jmpq *%rax
5280};
5281
5282CODE const uint8_t sk_lerp_1_float_sse41[] = {
5283 72,173, //lods %ds:(%rsi),%rax
5284 243,68,15,16,0, //movss (%rax),%xmm8
5285 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5286 15,92,196, //subps %xmm4,%xmm0
5287 65,15,89,192, //mulps %xmm8,%xmm0
5288 15,88,196, //addps %xmm4,%xmm0
5289 15,92,205, //subps %xmm5,%xmm1
5290 65,15,89,200, //mulps %xmm8,%xmm1
5291 15,88,205, //addps %xmm5,%xmm1
5292 15,92,214, //subps %xmm6,%xmm2
5293 65,15,89,208, //mulps %xmm8,%xmm2
5294 15,88,214, //addps %xmm6,%xmm2
5295 15,92,223, //subps %xmm7,%xmm3
5296 65,15,89,216, //mulps %xmm8,%xmm3
5297 15,88,223, //addps %xmm7,%xmm3
5298 72,173, //lods %ds:(%rsi),%rax
5299 255,224, //jmpq *%rax
5300};
5301
5302CODE const uint8_t sk_lerp_u8_sse41[] = {
5303 72,173, //lods %ds:(%rsi),%rax
5304 72,139,0, //mov (%rax),%rax
5305 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
5306 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005307 184,129,128,128,59, //mov $0x3b808081,%eax
5308 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05005309 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5310 69,15,89,200, //mulps %xmm8,%xmm9
5311 15,92,196, //subps %xmm4,%xmm0
5312 65,15,89,193, //mulps %xmm9,%xmm0
5313 15,88,196, //addps %xmm4,%xmm0
5314 15,92,205, //subps %xmm5,%xmm1
5315 65,15,89,201, //mulps %xmm9,%xmm1
5316 15,88,205, //addps %xmm5,%xmm1
5317 15,92,214, //subps %xmm6,%xmm2
5318 65,15,89,209, //mulps %xmm9,%xmm2
5319 15,88,214, //addps %xmm6,%xmm2
5320 15,92,223, //subps %xmm7,%xmm3
5321 65,15,89,217, //mulps %xmm9,%xmm3
5322 15,88,223, //addps %xmm7,%xmm3
5323 72,173, //lods %ds:(%rsi),%rax
5324 255,224, //jmpq *%rax
5325};
5326
5327CODE const uint8_t sk_lerp_565_sse41[] = {
5328 72,173, //lods %ds:(%rsi),%rax
5329 72,139,0, //mov (%rax),%rax
5330 102,68,15,56,51,4,120, //pmovzxwd (%rax,%rdi,2),%xmm8
5331 102,15,110,90,104, //movd 0x68(%rdx),%xmm3
5332 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
5333 102,65,15,219,216, //pand %xmm8,%xmm3
5334 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005335 243,68,15,16,90,116, //movss 0x74(%rdx),%xmm11
5336 243,68,15,16,82,120, //movss 0x78(%rdx),%xmm10
Mike Klein894d5612017-03-07 07:59:52 -05005337 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5338 69,15,89,217, //mulps %xmm9,%xmm11
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005339 102,15,110,90,108, //movd 0x6c(%rdx),%xmm3
5340 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
5341 102,65,15,219,216, //pand %xmm8,%xmm3
5342 15,91,219, //cvtdq2ps %xmm3,%xmm3
5343 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5344 68,15,89,211, //mulps %xmm3,%xmm10
5345 102,15,110,90,112, //movd 0x70(%rdx),%xmm3
5346 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
5347 102,65,15,219,216, //pand %xmm8,%xmm3
5348 68,15,91,195, //cvtdq2ps %xmm3,%xmm8
5349 243,15,16,90,124, //movss 0x7c(%rdx),%xmm3
5350 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5351 65,15,89,216, //mulps %xmm8,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005352 15,92,196, //subps %xmm4,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005353 65,15,89,195, //mulps %xmm11,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005354 15,88,196, //addps %xmm4,%xmm0
5355 15,92,205, //subps %xmm5,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005356 65,15,89,202, //mulps %xmm10,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05005357 15,88,205, //addps %xmm5,%xmm1
5358 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005359 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05005360 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005361 184,0,0,128,63, //mov $0x3f800000,%eax
5362 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05005363 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5364 72,173, //lods %ds:(%rsi),%rax
5365 255,224, //jmpq *%rax
5366};
5367
5368CODE const uint8_t sk_load_tables_sse41[] = {
5369 72,173, //lods %ds:(%rsi),%rax
5370 72,139,8, //mov (%rax),%rcx
5371 76,139,64,8, //mov 0x8(%rax),%r8
5372 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
5373 102,15,110,66,16, //movd 0x10(%rdx),%xmm0
5374 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
5375 102,65,15,111,200, //movdqa %xmm8,%xmm1
5376 102,15,114,209,8, //psrld $0x8,%xmm1
5377 102,15,219,200, //pand %xmm0,%xmm1
5378 102,65,15,111,208, //movdqa %xmm8,%xmm2
5379 102,15,114,210,16, //psrld $0x10,%xmm2
5380 102,15,219,208, //pand %xmm0,%xmm2
5381 102,65,15,219,192, //pand %xmm8,%xmm0
5382 102,72,15,58,22,193,1, //pextrq $0x1,%xmm0,%rcx
5383 65,137,201, //mov %ecx,%r9d
5384 72,193,233,32, //shr $0x20,%rcx
5385 102,73,15,126,194, //movq %xmm0,%r10
5386 69,137,211, //mov %r10d,%r11d
5387 73,193,234,32, //shr $0x20,%r10
5388 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
5389 102,67,15,58,33,4,144,16, //insertps $0x10,(%r8,%r10,4),%xmm0
5390 102,67,15,58,33,4,136,32, //insertps $0x20,(%r8,%r9,4),%xmm0
5391 102,65,15,58,33,4,136,48, //insertps $0x30,(%r8,%rcx,4),%xmm0
5392 72,139,72,16, //mov 0x10(%rax),%rcx
5393 102,73,15,58,22,200,1, //pextrq $0x1,%xmm1,%r8
5394 69,137,193, //mov %r8d,%r9d
5395 73,193,232,32, //shr $0x20,%r8
5396 102,73,15,126,202, //movq %xmm1,%r10
5397 69,137,211, //mov %r10d,%r11d
5398 73,193,234,32, //shr $0x20,%r10
5399 243,66,15,16,12,153, //movss (%rcx,%r11,4),%xmm1
5400 102,66,15,58,33,12,145,16, //insertps $0x10,(%rcx,%r10,4),%xmm1
5401 243,66,15,16,28,137, //movss (%rcx,%r9,4),%xmm3
5402 102,15,58,33,203,32, //insertps $0x20,%xmm3,%xmm1
5403 243,66,15,16,28,129, //movss (%rcx,%r8,4),%xmm3
5404 102,15,58,33,203,48, //insertps $0x30,%xmm3,%xmm1
5405 72,139,64,24, //mov 0x18(%rax),%rax
5406 102,72,15,58,22,209,1, //pextrq $0x1,%xmm2,%rcx
5407 65,137,200, //mov %ecx,%r8d
5408 72,193,233,32, //shr $0x20,%rcx
5409 102,73,15,126,209, //movq %xmm2,%r9
5410 69,137,202, //mov %r9d,%r10d
5411 73,193,233,32, //shr $0x20,%r9
5412 243,66,15,16,20,144, //movss (%rax,%r10,4),%xmm2
5413 102,66,15,58,33,20,136,16, //insertps $0x10,(%rax,%r9,4),%xmm2
5414 243,66,15,16,28,128, //movss (%rax,%r8,4),%xmm3
5415 102,15,58,33,211,32, //insertps $0x20,%xmm3,%xmm2
5416 243,15,16,28,136, //movss (%rax,%rcx,4),%xmm3
5417 102,15,58,33,211,48, //insertps $0x30,%xmm3,%xmm2
5418 102,65,15,114,208,24, //psrld $0x18,%xmm8
5419 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
5420 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
5421 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5422 65,15,89,216, //mulps %xmm8,%xmm3
5423 72,173, //lods %ds:(%rsi),%rax
5424 255,224, //jmpq *%rax
5425};
5426
5427CODE const uint8_t sk_load_a8_sse41[] = {
5428 72,173, //lods %ds:(%rsi),%rax
5429 72,139,0, //mov (%rax),%rax
5430 102,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm0
5431 15,91,192, //cvtdq2ps %xmm0,%xmm0
5432 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
5433 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5434 15,89,216, //mulps %xmm0,%xmm3
5435 72,173, //lods %ds:(%rsi),%rax
5436 15,87,192, //xorps %xmm0,%xmm0
5437 15,87,201, //xorps %xmm1,%xmm1
5438 15,87,210, //xorps %xmm2,%xmm2
5439 255,224, //jmpq *%rax
5440};
5441
5442CODE const uint8_t sk_store_a8_sse41[] = {
5443 72,173, //lods %ds:(%rsi),%rax
5444 72,139,0, //mov (%rax),%rax
5445 243,68,15,16,66,8, //movss 0x8(%rdx),%xmm8
5446 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5447 68,15,89,195, //mulps %xmm3,%xmm8
5448 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
5449 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
5450 102,69,15,103,192, //packuswb %xmm8,%xmm8
5451 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
5452 72,173, //lods %ds:(%rsi),%rax
5453 255,224, //jmpq *%rax
5454};
5455
5456CODE const uint8_t sk_load_565_sse41[] = {
5457 72,173, //lods %ds:(%rsi),%rax
5458 72,139,0, //mov (%rax),%rax
5459 102,68,15,56,51,12,120, //pmovzxwd (%rax,%rdi,2),%xmm9
5460 102,15,110,66,104, //movd 0x68(%rdx),%xmm0
5461 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
5462 102,65,15,219,193, //pand %xmm9,%xmm0
5463 15,91,200, //cvtdq2ps %xmm0,%xmm1
5464 243,15,16,26, //movss (%rdx),%xmm3
5465 243,15,16,66,116, //movss 0x74(%rdx),%xmm0
5466 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5467 15,89,193, //mulps %xmm1,%xmm0
5468 102,15,110,74,108, //movd 0x6c(%rdx),%xmm1
5469 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
5470 102,65,15,219,201, //pand %xmm9,%xmm1
5471 68,15,91,193, //cvtdq2ps %xmm1,%xmm8
5472 243,15,16,74,120, //movss 0x78(%rdx),%xmm1
5473 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
5474 65,15,89,200, //mulps %xmm8,%xmm1
5475 102,15,110,82,112, //movd 0x70(%rdx),%xmm2
5476 102,15,112,210,0, //pshufd $0x0,%xmm2,%xmm2
5477 102,65,15,219,209, //pand %xmm9,%xmm2
5478 68,15,91,194, //cvtdq2ps %xmm2,%xmm8
5479 243,15,16,82,124, //movss 0x7c(%rdx),%xmm2
5480 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
5481 65,15,89,208, //mulps %xmm8,%xmm2
5482 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5483 72,173, //lods %ds:(%rsi),%rax
5484 255,224, //jmpq *%rax
5485};
5486
5487CODE const uint8_t sk_store_565_sse41[] = {
5488 72,173, //lods %ds:(%rsi),%rax
5489 72,139,0, //mov (%rax),%rax
5490 243,68,15,16,130,128,0,0,0, //movss 0x80(%rdx),%xmm8
5491 243,68,15,16,138,132,0,0,0, //movss 0x84(%rdx),%xmm9
5492 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5493 69,15,40,208, //movaps %xmm8,%xmm10
5494 68,15,89,208, //mulps %xmm0,%xmm10
5495 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
5496 102,65,15,114,242,11, //pslld $0xb,%xmm10
5497 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5498 68,15,89,201, //mulps %xmm1,%xmm9
5499 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
5500 102,65,15,114,241,5, //pslld $0x5,%xmm9
5501 102,69,15,235,202, //por %xmm10,%xmm9
5502 68,15,89,194, //mulps %xmm2,%xmm8
5503 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
5504 102,69,15,86,193, //orpd %xmm9,%xmm8
5505 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
5506 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
5507 72,173, //lods %ds:(%rsi),%rax
5508 255,224, //jmpq *%rax
5509};
5510
5511CODE const uint8_t sk_load_8888_sse41[] = {
5512 72,173, //lods %ds:(%rsi),%rax
5513 72,139,0, //mov (%rax),%rax
5514 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005515 184,255,0,0,0, //mov $0xff,%eax
5516 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05005517 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
5518 102,15,111,203, //movdqa %xmm3,%xmm1
5519 102,15,114,209,8, //psrld $0x8,%xmm1
5520 102,15,219,200, //pand %xmm0,%xmm1
5521 102,15,111,211, //movdqa %xmm3,%xmm2
5522 102,15,114,210,16, //psrld $0x10,%xmm2
5523 102,15,219,208, //pand %xmm0,%xmm2
5524 102,15,219,195, //pand %xmm3,%xmm0
5525 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005526 184,129,128,128,59, //mov $0x3b808081,%eax
5527 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005528 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5529 65,15,89,192, //mulps %xmm8,%xmm0
5530 15,91,201, //cvtdq2ps %xmm1,%xmm1
5531 65,15,89,200, //mulps %xmm8,%xmm1
5532 15,91,210, //cvtdq2ps %xmm2,%xmm2
5533 65,15,89,208, //mulps %xmm8,%xmm2
5534 102,15,114,211,24, //psrld $0x18,%xmm3
5535 15,91,219, //cvtdq2ps %xmm3,%xmm3
5536 65,15,89,216, //mulps %xmm8,%xmm3
5537 72,173, //lods %ds:(%rsi),%rax
5538 255,224, //jmpq *%rax
5539};
5540
5541CODE const uint8_t sk_store_8888_sse41[] = {
5542 72,173, //lods %ds:(%rsi),%rax
5543 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05005544 185,0,0,127,67, //mov $0x437f0000,%ecx
5545 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05005546 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5547 69,15,40,200, //movaps %xmm8,%xmm9
5548 68,15,89,200, //mulps %xmm0,%xmm9
5549 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
5550 69,15,40,208, //movaps %xmm8,%xmm10
5551 68,15,89,209, //mulps %xmm1,%xmm10
5552 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
5553 102,65,15,114,242,8, //pslld $0x8,%xmm10
5554 102,69,15,235,209, //por %xmm9,%xmm10
5555 69,15,40,200, //movaps %xmm8,%xmm9
5556 68,15,89,202, //mulps %xmm2,%xmm9
5557 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
5558 102,65,15,114,241,16, //pslld $0x10,%xmm9
5559 68,15,89,195, //mulps %xmm3,%xmm8
5560 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
5561 102,65,15,114,240,24, //pslld $0x18,%xmm8
5562 102,69,15,235,193, //por %xmm9,%xmm8
5563 102,69,15,235,194, //por %xmm10,%xmm8
5564 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
5565 72,173, //lods %ds:(%rsi),%rax
5566 255,224, //jmpq *%rax
5567};
5568
5569CODE const uint8_t sk_load_f16_sse41[] = {
5570 72,173, //lods %ds:(%rsi),%rax
5571 72,139,0, //mov (%rax),%rax
5572 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
5573 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
5574 102,15,111,208, //movdqa %xmm0,%xmm2
5575 102,15,97,209, //punpcklwd %xmm1,%xmm2
5576 102,15,105,193, //punpckhwd %xmm1,%xmm0
5577 102,68,15,111,194, //movdqa %xmm2,%xmm8
5578 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
5579 102,15,105,208, //punpckhwd %xmm0,%xmm2
5580 102,15,110,66,100, //movd 0x64(%rdx),%xmm0
5581 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
5582 102,15,111,203, //movdqa %xmm3,%xmm1
5583 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
5584 102,65,15,223,200, //pandn %xmm8,%xmm1
5585 102,15,101,218, //pcmpgtw %xmm2,%xmm3
5586 102,15,223,218, //pandn %xmm2,%xmm3
5587 102,15,56,51,193, //pmovzxwd %xmm1,%xmm0
5588 102,15,114,240,13, //pslld $0xd,%xmm0
5589 102,15,110,82,92, //movd 0x5c(%rdx),%xmm2
5590 102,68,15,112,194,0, //pshufd $0x0,%xmm2,%xmm8
5591 65,15,89,192, //mulps %xmm8,%xmm0
5592 102,69,15,239,201, //pxor %xmm9,%xmm9
5593 102,65,15,105,201, //punpckhwd %xmm9,%xmm1
5594 102,15,114,241,13, //pslld $0xd,%xmm1
5595 65,15,89,200, //mulps %xmm8,%xmm1
5596 102,15,56,51,211, //pmovzxwd %xmm3,%xmm2
5597 102,15,114,242,13, //pslld $0xd,%xmm2
5598 65,15,89,208, //mulps %xmm8,%xmm2
5599 102,65,15,105,217, //punpckhwd %xmm9,%xmm3
5600 102,15,114,243,13, //pslld $0xd,%xmm3
5601 65,15,89,216, //mulps %xmm8,%xmm3
5602 72,173, //lods %ds:(%rsi),%rax
5603 255,224, //jmpq *%rax
5604};
5605
5606CODE const uint8_t sk_store_f16_sse41[] = {
5607 72,173, //lods %ds:(%rsi),%rax
5608 72,139,0, //mov (%rax),%rax
5609 102,68,15,110,66,96, //movd 0x60(%rdx),%xmm8
5610 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
5611 102,69,15,111,200, //movdqa %xmm8,%xmm9
5612 68,15,89,200, //mulps %xmm0,%xmm9
5613 102,65,15,114,209,13, //psrld $0xd,%xmm9
5614 102,69,15,111,208, //movdqa %xmm8,%xmm10
5615 68,15,89,209, //mulps %xmm1,%xmm10
5616 102,65,15,114,210,13, //psrld $0xd,%xmm10
5617 102,69,15,111,216, //movdqa %xmm8,%xmm11
5618 68,15,89,218, //mulps %xmm2,%xmm11
5619 102,65,15,114,211,13, //psrld $0xd,%xmm11
5620 68,15,89,195, //mulps %xmm3,%xmm8
5621 102,65,15,114,208,13, //psrld $0xd,%xmm8
5622 102,65,15,115,250,2, //pslldq $0x2,%xmm10
5623 102,69,15,235,209, //por %xmm9,%xmm10
5624 102,65,15,115,248,2, //pslldq $0x2,%xmm8
5625 102,69,15,235,195, //por %xmm11,%xmm8
5626 102,69,15,111,202, //movdqa %xmm10,%xmm9
5627 102,69,15,98,200, //punpckldq %xmm8,%xmm9
5628 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
5629 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
5630 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
5631 72,173, //lods %ds:(%rsi),%rax
5632 255,224, //jmpq *%rax
5633};
5634
5635CODE const uint8_t sk_store_f32_sse41[] = {
5636 72,173, //lods %ds:(%rsi),%rax
5637 72,139,0, //mov (%rax),%rax
5638 72,137,249, //mov %rdi,%rcx
5639 72,193,225,4, //shl $0x4,%rcx
5640 68,15,40,192, //movaps %xmm0,%xmm8
5641 68,15,40,200, //movaps %xmm0,%xmm9
5642 68,15,20,201, //unpcklps %xmm1,%xmm9
5643 68,15,40,210, //movaps %xmm2,%xmm10
5644 68,15,40,218, //movaps %xmm2,%xmm11
5645 68,15,20,219, //unpcklps %xmm3,%xmm11
5646 68,15,21,193, //unpckhps %xmm1,%xmm8
5647 68,15,21,211, //unpckhps %xmm3,%xmm10
5648 69,15,40,225, //movaps %xmm9,%xmm12
5649 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
5650 102,69,15,21,203, //unpckhpd %xmm11,%xmm9
5651 69,15,40,216, //movaps %xmm8,%xmm11
5652 102,69,15,20,218, //unpcklpd %xmm10,%xmm11
5653 102,69,15,21,194, //unpckhpd %xmm10,%xmm8
5654 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
5655 102,68,15,17,76,8,16, //movupd %xmm9,0x10(%rax,%rcx,1)
5656 102,68,15,17,92,8,32, //movupd %xmm11,0x20(%rax,%rcx,1)
5657 102,68,15,17,68,8,48, //movupd %xmm8,0x30(%rax,%rcx,1)
5658 72,173, //lods %ds:(%rsi),%rax
5659 255,224, //jmpq *%rax
5660};
5661
5662CODE const uint8_t sk_clamp_x_sse41[] = {
5663 72,173, //lods %ds:(%rsi),%rax
5664 69,15,87,192, //xorps %xmm8,%xmm8
5665 68,15,95,192, //maxps %xmm0,%xmm8
5666 243,68,15,16,8, //movss (%rax),%xmm9
5667 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5668 102,15,118,192, //pcmpeqd %xmm0,%xmm0
5669 102,65,15,254,193, //paddd %xmm9,%xmm0
5670 68,15,93,192, //minps %xmm0,%xmm8
5671 72,173, //lods %ds:(%rsi),%rax
5672 65,15,40,192, //movaps %xmm8,%xmm0
5673 255,224, //jmpq *%rax
5674};
5675
5676CODE const uint8_t sk_clamp_y_sse41[] = {
5677 72,173, //lods %ds:(%rsi),%rax
5678 69,15,87,192, //xorps %xmm8,%xmm8
5679 68,15,95,193, //maxps %xmm1,%xmm8
5680 243,68,15,16,8, //movss (%rax),%xmm9
5681 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5682 102,15,118,201, //pcmpeqd %xmm1,%xmm1
5683 102,65,15,254,201, //paddd %xmm9,%xmm1
5684 68,15,93,193, //minps %xmm1,%xmm8
5685 72,173, //lods %ds:(%rsi),%rax
5686 65,15,40,200, //movaps %xmm8,%xmm1
5687 255,224, //jmpq *%rax
5688};
5689
5690CODE const uint8_t sk_repeat_x_sse41[] = {
5691 72,173, //lods %ds:(%rsi),%rax
5692 243,68,15,16,0, //movss (%rax),%xmm8
5693 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5694 68,15,40,200, //movaps %xmm0,%xmm9
5695 69,15,94,200, //divps %xmm8,%xmm9
5696 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
5697 69,15,89,200, //mulps %xmm8,%xmm9
5698 65,15,92,193, //subps %xmm9,%xmm0
5699 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
5700 102,69,15,254,200, //paddd %xmm8,%xmm9
5701 65,15,93,193, //minps %xmm9,%xmm0
5702 72,173, //lods %ds:(%rsi),%rax
5703 255,224, //jmpq *%rax
5704};
5705
5706CODE const uint8_t sk_repeat_y_sse41[] = {
5707 72,173, //lods %ds:(%rsi),%rax
5708 243,68,15,16,0, //movss (%rax),%xmm8
5709 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5710 68,15,40,201, //movaps %xmm1,%xmm9
5711 69,15,94,200, //divps %xmm8,%xmm9
5712 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
5713 69,15,89,200, //mulps %xmm8,%xmm9
5714 65,15,92,201, //subps %xmm9,%xmm1
5715 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
5716 102,69,15,254,200, //paddd %xmm8,%xmm9
5717 65,15,93,201, //minps %xmm9,%xmm1
5718 72,173, //lods %ds:(%rsi),%rax
5719 255,224, //jmpq *%rax
5720};
5721
5722CODE const uint8_t sk_mirror_x_sse41[] = {
5723 72,173, //lods %ds:(%rsi),%rax
5724 243,68,15,16,0, //movss (%rax),%xmm8
5725 69,15,40,200, //movaps %xmm8,%xmm9
5726 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5727 65,15,92,193, //subps %xmm9,%xmm0
5728 243,69,15,88,192, //addss %xmm8,%xmm8
5729 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5730 68,15,40,208, //movaps %xmm0,%xmm10
5731 69,15,94,208, //divps %xmm8,%xmm10
5732 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
5733 69,15,89,208, //mulps %xmm8,%xmm10
5734 65,15,92,194, //subps %xmm10,%xmm0
5735 65,15,92,193, //subps %xmm9,%xmm0
5736 69,15,87,192, //xorps %xmm8,%xmm8
5737 68,15,92,192, //subps %xmm0,%xmm8
5738 65,15,84,192, //andps %xmm8,%xmm0
5739 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
5740 102,69,15,254,193, //paddd %xmm9,%xmm8
5741 65,15,93,192, //minps %xmm8,%xmm0
5742 72,173, //lods %ds:(%rsi),%rax
5743 255,224, //jmpq *%rax
5744};
5745
5746CODE const uint8_t sk_mirror_y_sse41[] = {
5747 72,173, //lods %ds:(%rsi),%rax
5748 243,68,15,16,0, //movss (%rax),%xmm8
5749 69,15,40,200, //movaps %xmm8,%xmm9
5750 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5751 65,15,92,201, //subps %xmm9,%xmm1
5752 243,69,15,88,192, //addss %xmm8,%xmm8
5753 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5754 68,15,40,209, //movaps %xmm1,%xmm10
5755 69,15,94,208, //divps %xmm8,%xmm10
5756 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
5757 69,15,89,208, //mulps %xmm8,%xmm10
5758 65,15,92,202, //subps %xmm10,%xmm1
5759 65,15,92,201, //subps %xmm9,%xmm1
5760 69,15,87,192, //xorps %xmm8,%xmm8
5761 68,15,92,193, //subps %xmm1,%xmm8
5762 65,15,84,200, //andps %xmm8,%xmm1
5763 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
5764 102,69,15,254,193, //paddd %xmm9,%xmm8
5765 65,15,93,200, //minps %xmm8,%xmm1
5766 72,173, //lods %ds:(%rsi),%rax
5767 255,224, //jmpq *%rax
5768};
5769
Mike Kleine9ed07d2017-03-07 12:28:11 -05005770CODE const uint8_t sk_luminance_to_alpha_sse41[] = {
5771 243,15,16,154,136,0,0,0, //movss 0x88(%rdx),%xmm3
5772 243,68,15,16,130,140,0,0,0, //movss 0x8c(%rdx),%xmm8
5773 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5774 15,89,216, //mulps %xmm0,%xmm3
5775 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5776 68,15,89,193, //mulps %xmm1,%xmm8
5777 68,15,88,195, //addps %xmm3,%xmm8
5778 243,15,16,154,144,0,0,0, //movss 0x90(%rdx),%xmm3
5779 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
5780 15,89,218, //mulps %xmm2,%xmm3
5781 65,15,88,216, //addps %xmm8,%xmm3
5782 72,173, //lods %ds:(%rsi),%rax
5783 15,87,192, //xorps %xmm0,%xmm0
5784 15,87,201, //xorps %xmm1,%xmm1
5785 15,87,210, //xorps %xmm2,%xmm2
5786 255,224, //jmpq *%rax
5787};
5788
Mike Klein894d5612017-03-07 07:59:52 -05005789CODE const uint8_t sk_matrix_2x3_sse41[] = {
5790 68,15,40,201, //movaps %xmm1,%xmm9
5791 68,15,40,192, //movaps %xmm0,%xmm8
5792 72,173, //lods %ds:(%rsi),%rax
5793 243,15,16,0, //movss (%rax),%xmm0
5794 243,15,16,72,4, //movss 0x4(%rax),%xmm1
5795 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5796 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
5797 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5798 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
5799 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5800 69,15,89,209, //mulps %xmm9,%xmm10
5801 69,15,88,211, //addps %xmm11,%xmm10
5802 65,15,89,192, //mulps %xmm8,%xmm0
5803 65,15,88,194, //addps %xmm10,%xmm0
5804 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
5805 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
5806 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5807 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
5808 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5809 69,15,89,209, //mulps %xmm9,%xmm10
5810 69,15,88,211, //addps %xmm11,%xmm10
5811 65,15,89,200, //mulps %xmm8,%xmm1
5812 65,15,88,202, //addps %xmm10,%xmm1
5813 72,173, //lods %ds:(%rsi),%rax
5814 255,224, //jmpq *%rax
5815};
5816
5817CODE const uint8_t sk_matrix_3x4_sse41[] = {
5818 68,15,40,201, //movaps %xmm1,%xmm9
5819 68,15,40,192, //movaps %xmm0,%xmm8
5820 72,173, //lods %ds:(%rsi),%rax
5821 243,15,16,0, //movss (%rax),%xmm0
5822 243,15,16,72,4, //movss 0x4(%rax),%xmm1
5823 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5824 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
5825 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5826 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
5827 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5828 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
5829 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5830 68,15,89,218, //mulps %xmm2,%xmm11
5831 69,15,88,220, //addps %xmm12,%xmm11
5832 69,15,89,209, //mulps %xmm9,%xmm10
5833 69,15,88,211, //addps %xmm11,%xmm10
5834 65,15,89,192, //mulps %xmm8,%xmm0
5835 65,15,88,194, //addps %xmm10,%xmm0
5836 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
5837 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
5838 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5839 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
5840 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5841 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
5842 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5843 68,15,89,218, //mulps %xmm2,%xmm11
5844 69,15,88,220, //addps %xmm12,%xmm11
5845 69,15,89,209, //mulps %xmm9,%xmm10
5846 69,15,88,211, //addps %xmm11,%xmm10
5847 65,15,89,200, //mulps %xmm8,%xmm1
5848 65,15,88,202, //addps %xmm10,%xmm1
5849 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
5850 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5851 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
5852 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5853 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
5854 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5855 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
5856 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5857 68,15,89,226, //mulps %xmm2,%xmm12
5858 69,15,88,229, //addps %xmm13,%xmm12
5859 69,15,89,217, //mulps %xmm9,%xmm11
5860 69,15,88,220, //addps %xmm12,%xmm11
5861 69,15,89,208, //mulps %xmm8,%xmm10
5862 69,15,88,211, //addps %xmm11,%xmm10
5863 72,173, //lods %ds:(%rsi),%rax
5864 65,15,40,210, //movaps %xmm10,%xmm2
5865 255,224, //jmpq *%rax
5866};
5867
Mike Kleine9ed07d2017-03-07 12:28:11 -05005868CODE const uint8_t sk_matrix_4x5_sse41[] = {
5869 68,15,40,201, //movaps %xmm1,%xmm9
5870 68,15,40,192, //movaps %xmm0,%xmm8
5871 72,173, //lods %ds:(%rsi),%rax
5872 243,15,16,0, //movss (%rax),%xmm0
5873 243,15,16,72,4, //movss 0x4(%rax),%xmm1
5874 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5875 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
5876 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5877 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
5878 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5879 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
5880 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5881 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
5882 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5883 68,15,89,227, //mulps %xmm3,%xmm12
5884 69,15,88,229, //addps %xmm13,%xmm12
5885 68,15,89,218, //mulps %xmm2,%xmm11
5886 69,15,88,220, //addps %xmm12,%xmm11
5887 69,15,89,209, //mulps %xmm9,%xmm10
5888 69,15,88,211, //addps %xmm11,%xmm10
5889 65,15,89,192, //mulps %xmm8,%xmm0
5890 65,15,88,194, //addps %xmm10,%xmm0
5891 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
5892 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
5893 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5894 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
5895 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5896 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
5897 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5898 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
5899 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5900 68,15,89,227, //mulps %xmm3,%xmm12
5901 69,15,88,229, //addps %xmm13,%xmm12
5902 68,15,89,218, //mulps %xmm2,%xmm11
5903 69,15,88,220, //addps %xmm12,%xmm11
5904 69,15,89,209, //mulps %xmm9,%xmm10
5905 69,15,88,211, //addps %xmm11,%xmm10
5906 65,15,89,200, //mulps %xmm8,%xmm1
5907 65,15,88,202, //addps %xmm10,%xmm1
5908 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
5909 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5910 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
5911 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5912 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
5913 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5914 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
5915 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5916 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
5917 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
5918 68,15,89,235, //mulps %xmm3,%xmm13
5919 69,15,88,238, //addps %xmm14,%xmm13
5920 68,15,89,226, //mulps %xmm2,%xmm12
5921 69,15,88,229, //addps %xmm13,%xmm12
5922 69,15,89,217, //mulps %xmm9,%xmm11
5923 69,15,88,220, //addps %xmm12,%xmm11
5924 69,15,89,208, //mulps %xmm8,%xmm10
5925 69,15,88,211, //addps %xmm11,%xmm10
5926 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
5927 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5928 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
5929 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5930 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
5931 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
5932 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
5933 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
5934 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
5935 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
5936 68,15,89,243, //mulps %xmm3,%xmm14
5937 69,15,88,247, //addps %xmm15,%xmm14
5938 68,15,89,234, //mulps %xmm2,%xmm13
5939 69,15,88,238, //addps %xmm14,%xmm13
5940 69,15,89,225, //mulps %xmm9,%xmm12
5941 69,15,88,229, //addps %xmm13,%xmm12
5942 69,15,89,216, //mulps %xmm8,%xmm11
5943 69,15,88,220, //addps %xmm12,%xmm11
5944 72,173, //lods %ds:(%rsi),%rax
5945 65,15,40,210, //movaps %xmm10,%xmm2
5946 65,15,40,219, //movaps %xmm11,%xmm3
5947 255,224, //jmpq *%rax
5948};
5949
Mike Klein894d5612017-03-07 07:59:52 -05005950CODE const uint8_t sk_matrix_perspective_sse41[] = {
5951 68,15,40,192, //movaps %xmm0,%xmm8
5952 72,173, //lods %ds:(%rsi),%rax
5953 243,15,16,0, //movss (%rax),%xmm0
5954 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
5955 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
5956 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5957 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
5958 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5959 68,15,89,201, //mulps %xmm1,%xmm9
5960 69,15,88,202, //addps %xmm10,%xmm9
5961 65,15,89,192, //mulps %xmm8,%xmm0
5962 65,15,88,193, //addps %xmm9,%xmm0
5963 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
5964 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
5965 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
5966 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5967 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
5968 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5969 68,15,89,209, //mulps %xmm1,%xmm10
5970 69,15,88,211, //addps %xmm11,%xmm10
5971 69,15,89,200, //mulps %xmm8,%xmm9
5972 69,15,88,202, //addps %xmm10,%xmm9
5973 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
5974 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
5975 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
5976 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
5977 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
5978 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
5979 68,15,89,217, //mulps %xmm1,%xmm11
5980 69,15,88,220, //addps %xmm12,%xmm11
5981 69,15,89,208, //mulps %xmm8,%xmm10
5982 69,15,88,211, //addps %xmm11,%xmm10
5983 65,15,83,202, //rcpps %xmm10,%xmm1
5984 15,89,193, //mulps %xmm1,%xmm0
5985 68,15,89,201, //mulps %xmm1,%xmm9
5986 72,173, //lods %ds:(%rsi),%rax
5987 65,15,40,201, //movaps %xmm9,%xmm1
5988 255,224, //jmpq *%rax
5989};
5990
5991CODE const uint8_t sk_linear_gradient_2stops_sse41[] = {
5992 72,173, //lods %ds:(%rsi),%rax
5993 68,15,16,8, //movups (%rax),%xmm9
5994 15,16,88,16, //movups 0x10(%rax),%xmm3
5995 68,15,40,195, //movaps %xmm3,%xmm8
5996 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
5997 65,15,40,201, //movaps %xmm9,%xmm1
5998 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
5999 68,15,89,192, //mulps %xmm0,%xmm8
6000 68,15,88,193, //addps %xmm1,%xmm8
6001 15,40,203, //movaps %xmm3,%xmm1
6002 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
6003 65,15,40,209, //movaps %xmm9,%xmm2
6004 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
6005 15,89,200, //mulps %xmm0,%xmm1
6006 15,88,202, //addps %xmm2,%xmm1
6007 15,40,211, //movaps %xmm3,%xmm2
6008 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
6009 69,15,40,209, //movaps %xmm9,%xmm10
6010 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
6011 15,89,208, //mulps %xmm0,%xmm2
6012 65,15,88,210, //addps %xmm10,%xmm2
6013 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
6014 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
6015 15,89,216, //mulps %xmm0,%xmm3
6016 65,15,88,217, //addps %xmm9,%xmm3
6017 72,173, //lods %ds:(%rsi),%rax
6018 65,15,40,192, //movaps %xmm8,%xmm0
6019 255,224, //jmpq *%rax
6020};
6021
6022CODE const uint8_t sk_start_pipeline_sse2[] = {
6023 65,87, //push %r15
6024 65,86, //push %r14
6025 65,85, //push %r13
6026 65,84, //push %r12
6027 83, //push %rbx
6028 73,137,207, //mov %rcx,%r15
6029 73,137,214, //mov %rdx,%r14
6030 72,137,251, //mov %rdi,%rbx
6031 72,173, //lods %ds:(%rsi),%rax
6032 73,137,196, //mov %rax,%r12
6033 73,137,245, //mov %rsi,%r13
6034 72,141,67,4, //lea 0x4(%rbx),%rax
6035 76,57,248, //cmp %r15,%rax
6036 118,5, //jbe 28 <_sk_start_pipeline_sse2+0x28>
6037 72,137,216, //mov %rbx,%rax
6038 235,52, //jmp 5c <_sk_start_pipeline_sse2+0x5c>
6039 15,87,192, //xorps %xmm0,%xmm0
6040 15,87,201, //xorps %xmm1,%xmm1
6041 15,87,210, //xorps %xmm2,%xmm2
6042 15,87,219, //xorps %xmm3,%xmm3
6043 15,87,228, //xorps %xmm4,%xmm4
6044 15,87,237, //xorps %xmm5,%xmm5
6045 15,87,246, //xorps %xmm6,%xmm6
6046 15,87,255, //xorps %xmm7,%xmm7
6047 72,137,223, //mov %rbx,%rdi
6048 76,137,238, //mov %r13,%rsi
6049 76,137,242, //mov %r14,%rdx
6050 65,255,212, //callq *%r12
6051 72,141,67,4, //lea 0x4(%rbx),%rax
6052 72,131,195,8, //add $0x8,%rbx
6053 76,57,251, //cmp %r15,%rbx
6054 72,137,195, //mov %rax,%rbx
6055 118,204, //jbe 28 <_sk_start_pipeline_sse2+0x28>
6056 91, //pop %rbx
6057 65,92, //pop %r12
6058 65,93, //pop %r13
6059 65,94, //pop %r14
6060 65,95, //pop %r15
6061 195, //retq
6062};
6063
6064CODE const uint8_t sk_just_return_sse2[] = {
6065 195, //retq
6066};
6067
6068CODE const uint8_t sk_seed_shader_sse2[] = {
6069 72,173, //lods %ds:(%rsi),%rax
6070 102,15,110,199, //movd %edi,%xmm0
6071 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
6072 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006073 185,0,0,0,63, //mov $0x3f000000,%ecx
6074 102,15,110,209, //movd %ecx,%xmm2
6075 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
6076 15,88,202, //addps %xmm2,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05006077 15,16,66,20, //movups 0x14(%rdx),%xmm0
6078 15,88,193, //addps %xmm1,%xmm0
6079 102,15,110,8, //movd (%rax),%xmm1
6080 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
6081 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006082 15,88,202, //addps %xmm2,%xmm1
6083 184,0,0,128,63, //mov $0x3f800000,%eax
6084 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006085 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
6086 72,173, //lods %ds:(%rsi),%rax
6087 15,87,219, //xorps %xmm3,%xmm3
6088 15,87,228, //xorps %xmm4,%xmm4
6089 15,87,237, //xorps %xmm5,%xmm5
6090 15,87,246, //xorps %xmm6,%xmm6
6091 15,87,255, //xorps %xmm7,%xmm7
6092 255,224, //jmpq *%rax
6093};
6094
6095CODE const uint8_t sk_constant_color_sse2[] = {
6096 72,173, //lods %ds:(%rsi),%rax
6097 15,16,24, //movups (%rax),%xmm3
6098 15,40,195, //movaps %xmm3,%xmm0
6099 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6100 15,40,203, //movaps %xmm3,%xmm1
6101 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
6102 15,40,211, //movaps %xmm3,%xmm2
6103 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
6104 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
6105 72,173, //lods %ds:(%rsi),%rax
6106 255,224, //jmpq *%rax
6107};
6108
6109CODE const uint8_t sk_clear_sse2[] = {
6110 72,173, //lods %ds:(%rsi),%rax
6111 15,87,192, //xorps %xmm0,%xmm0
6112 15,87,201, //xorps %xmm1,%xmm1
6113 15,87,210, //xorps %xmm2,%xmm2
6114 15,87,219, //xorps %xmm3,%xmm3
6115 255,224, //jmpq *%rax
6116};
6117
6118CODE const uint8_t sk_plus__sse2[] = {
6119 15,88,196, //addps %xmm4,%xmm0
6120 15,88,205, //addps %xmm5,%xmm1
6121 15,88,214, //addps %xmm6,%xmm2
6122 15,88,223, //addps %xmm7,%xmm3
6123 72,173, //lods %ds:(%rsi),%rax
6124 255,224, //jmpq *%rax
6125};
6126
6127CODE const uint8_t sk_srcover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006128 184,0,0,128,63, //mov $0x3f800000,%eax
6129 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006130 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6131 68,15,92,195, //subps %xmm3,%xmm8
6132 69,15,40,200, //movaps %xmm8,%xmm9
6133 68,15,89,204, //mulps %xmm4,%xmm9
6134 65,15,88,193, //addps %xmm9,%xmm0
6135 69,15,40,200, //movaps %xmm8,%xmm9
6136 68,15,89,205, //mulps %xmm5,%xmm9
6137 65,15,88,201, //addps %xmm9,%xmm1
6138 69,15,40,200, //movaps %xmm8,%xmm9
6139 68,15,89,206, //mulps %xmm6,%xmm9
6140 65,15,88,209, //addps %xmm9,%xmm2
6141 68,15,89,199, //mulps %xmm7,%xmm8
6142 65,15,88,216, //addps %xmm8,%xmm3
6143 72,173, //lods %ds:(%rsi),%rax
6144 255,224, //jmpq *%rax
6145};
6146
6147CODE const uint8_t sk_dstover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006148 184,0,0,128,63, //mov $0x3f800000,%eax
6149 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006150 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6151 68,15,92,199, //subps %xmm7,%xmm8
6152 65,15,89,192, //mulps %xmm8,%xmm0
6153 15,88,196, //addps %xmm4,%xmm0
6154 65,15,89,200, //mulps %xmm8,%xmm1
6155 15,88,205, //addps %xmm5,%xmm1
6156 65,15,89,208, //mulps %xmm8,%xmm2
6157 15,88,214, //addps %xmm6,%xmm2
6158 65,15,89,216, //mulps %xmm8,%xmm3
6159 15,88,223, //addps %xmm7,%xmm3
6160 72,173, //lods %ds:(%rsi),%rax
6161 255,224, //jmpq *%rax
6162};
6163
6164CODE const uint8_t sk_clamp_0_sse2[] = {
6165 69,15,87,192, //xorps %xmm8,%xmm8
6166 65,15,95,192, //maxps %xmm8,%xmm0
6167 65,15,95,200, //maxps %xmm8,%xmm1
6168 65,15,95,208, //maxps %xmm8,%xmm2
6169 65,15,95,216, //maxps %xmm8,%xmm3
6170 72,173, //lods %ds:(%rsi),%rax
6171 255,224, //jmpq *%rax
6172};
6173
6174CODE const uint8_t sk_clamp_1_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006175 184,0,0,128,63, //mov $0x3f800000,%eax
6176 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006177 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6178 65,15,93,192, //minps %xmm8,%xmm0
6179 65,15,93,200, //minps %xmm8,%xmm1
6180 65,15,93,208, //minps %xmm8,%xmm2
6181 65,15,93,216, //minps %xmm8,%xmm3
6182 72,173, //lods %ds:(%rsi),%rax
6183 255,224, //jmpq *%rax
6184};
6185
6186CODE const uint8_t sk_clamp_a_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006187 184,0,0,128,63, //mov $0x3f800000,%eax
6188 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006189 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6190 65,15,93,216, //minps %xmm8,%xmm3
6191 15,93,195, //minps %xmm3,%xmm0
6192 15,93,203, //minps %xmm3,%xmm1
6193 15,93,211, //minps %xmm3,%xmm2
6194 72,173, //lods %ds:(%rsi),%rax
6195 255,224, //jmpq *%rax
6196};
6197
6198CODE const uint8_t sk_set_rgb_sse2[] = {
6199 72,173, //lods %ds:(%rsi),%rax
6200 243,15,16,0, //movss (%rax),%xmm0
6201 243,15,16,72,4, //movss 0x4(%rax),%xmm1
6202 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6203 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
6204 243,15,16,80,8, //movss 0x8(%rax),%xmm2
6205 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
6206 72,173, //lods %ds:(%rsi),%rax
6207 255,224, //jmpq *%rax
6208};
6209
6210CODE const uint8_t sk_swap_rb_sse2[] = {
6211 68,15,40,192, //movaps %xmm0,%xmm8
6212 72,173, //lods %ds:(%rsi),%rax
6213 15,40,194, //movaps %xmm2,%xmm0
6214 65,15,40,208, //movaps %xmm8,%xmm2
6215 255,224, //jmpq *%rax
6216};
6217
6218CODE const uint8_t sk_swap_sse2[] = {
6219 68,15,40,195, //movaps %xmm3,%xmm8
6220 68,15,40,202, //movaps %xmm2,%xmm9
6221 68,15,40,209, //movaps %xmm1,%xmm10
6222 68,15,40,216, //movaps %xmm0,%xmm11
6223 72,173, //lods %ds:(%rsi),%rax
6224 15,40,196, //movaps %xmm4,%xmm0
6225 15,40,205, //movaps %xmm5,%xmm1
6226 15,40,214, //movaps %xmm6,%xmm2
6227 15,40,223, //movaps %xmm7,%xmm3
6228 65,15,40,227, //movaps %xmm11,%xmm4
6229 65,15,40,234, //movaps %xmm10,%xmm5
6230 65,15,40,241, //movaps %xmm9,%xmm6
6231 65,15,40,248, //movaps %xmm8,%xmm7
6232 255,224, //jmpq *%rax
6233};
6234
6235CODE const uint8_t sk_move_src_dst_sse2[] = {
6236 72,173, //lods %ds:(%rsi),%rax
6237 15,40,224, //movaps %xmm0,%xmm4
6238 15,40,233, //movaps %xmm1,%xmm5
6239 15,40,242, //movaps %xmm2,%xmm6
6240 15,40,251, //movaps %xmm3,%xmm7
6241 255,224, //jmpq *%rax
6242};
6243
6244CODE const uint8_t sk_move_dst_src_sse2[] = {
6245 72,173, //lods %ds:(%rsi),%rax
6246 15,40,196, //movaps %xmm4,%xmm0
6247 15,40,205, //movaps %xmm5,%xmm1
6248 15,40,214, //movaps %xmm6,%xmm2
6249 15,40,223, //movaps %xmm7,%xmm3
6250 255,224, //jmpq *%rax
6251};
6252
6253CODE const uint8_t sk_premul_sse2[] = {
6254 15,89,195, //mulps %xmm3,%xmm0
6255 15,89,203, //mulps %xmm3,%xmm1
6256 15,89,211, //mulps %xmm3,%xmm2
6257 72,173, //lods %ds:(%rsi),%rax
6258 255,224, //jmpq *%rax
6259};
6260
6261CODE const uint8_t sk_unpremul_sse2[] = {
6262 69,15,87,192, //xorps %xmm8,%xmm8
6263 68,15,194,195,0, //cmpeqps %xmm3,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006264 184,0,0,128,63, //mov $0x3f800000,%eax
6265 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006266 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6267 68,15,94,203, //divps %xmm3,%xmm9
6268 69,15,85,193, //andnps %xmm9,%xmm8
6269 65,15,89,192, //mulps %xmm8,%xmm0
6270 65,15,89,200, //mulps %xmm8,%xmm1
6271 65,15,89,208, //mulps %xmm8,%xmm2
6272 72,173, //lods %ds:(%rsi),%rax
6273 255,224, //jmpq *%rax
6274};
6275
6276CODE const uint8_t sk_from_srgb_sse2[] = {
6277 243,68,15,16,66,64, //movss 0x40(%rdx),%xmm8
6278 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6279 69,15,40,232, //movaps %xmm8,%xmm13
6280 68,15,89,232, //mulps %xmm0,%xmm13
6281 68,15,40,224, //movaps %xmm0,%xmm12
6282 69,15,89,228, //mulps %xmm12,%xmm12
6283 243,68,15,16,74,60, //movss 0x3c(%rdx),%xmm9
6284 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6285 243,68,15,16,82,52, //movss 0x34(%rdx),%xmm10
6286 243,68,15,16,90,56, //movss 0x38(%rdx),%xmm11
6287 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6288 69,15,40,241, //movaps %xmm9,%xmm14
6289 68,15,89,240, //mulps %xmm0,%xmm14
6290 69,15,88,243, //addps %xmm11,%xmm14
6291 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6292 69,15,89,244, //mulps %xmm12,%xmm14
6293 69,15,88,242, //addps %xmm10,%xmm14
6294 243,68,15,16,98,68, //movss 0x44(%rdx),%xmm12
6295 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6296 65,15,194,196,1, //cmpltps %xmm12,%xmm0
6297 68,15,84,232, //andps %xmm0,%xmm13
6298 65,15,85,198, //andnps %xmm14,%xmm0
6299 65,15,86,197, //orps %xmm13,%xmm0
6300 69,15,40,232, //movaps %xmm8,%xmm13
6301 68,15,89,233, //mulps %xmm1,%xmm13
6302 68,15,40,241, //movaps %xmm1,%xmm14
6303 69,15,89,246, //mulps %xmm14,%xmm14
6304 69,15,40,249, //movaps %xmm9,%xmm15
6305 68,15,89,249, //mulps %xmm1,%xmm15
6306 69,15,88,251, //addps %xmm11,%xmm15
6307 69,15,89,254, //mulps %xmm14,%xmm15
6308 69,15,88,250, //addps %xmm10,%xmm15
6309 65,15,194,204,1, //cmpltps %xmm12,%xmm1
6310 68,15,84,233, //andps %xmm1,%xmm13
6311 65,15,85,207, //andnps %xmm15,%xmm1
6312 65,15,86,205, //orps %xmm13,%xmm1
6313 68,15,89,194, //mulps %xmm2,%xmm8
6314 68,15,40,234, //movaps %xmm2,%xmm13
6315 69,15,89,237, //mulps %xmm13,%xmm13
6316 68,15,89,202, //mulps %xmm2,%xmm9
6317 69,15,88,203, //addps %xmm11,%xmm9
6318 69,15,89,205, //mulps %xmm13,%xmm9
6319 69,15,88,202, //addps %xmm10,%xmm9
6320 65,15,194,212,1, //cmpltps %xmm12,%xmm2
6321 68,15,84,194, //andps %xmm2,%xmm8
6322 65,15,85,209, //andnps %xmm9,%xmm2
6323 65,15,86,208, //orps %xmm8,%xmm2
6324 72,173, //lods %ds:(%rsi),%rax
6325 255,224, //jmpq *%rax
6326};
6327
6328CODE const uint8_t sk_to_srgb_sse2[] = {
6329 72,131,236,40, //sub $0x28,%rsp
6330 15,41,124,36,16, //movaps %xmm7,0x10(%rsp)
6331 15,41,52,36, //movaps %xmm6,(%rsp)
6332 15,40,245, //movaps %xmm5,%xmm6
6333 15,40,236, //movaps %xmm4,%xmm5
6334 15,40,227, //movaps %xmm3,%xmm4
6335 68,15,82,192, //rsqrtps %xmm0,%xmm8
6336 69,15,83,232, //rcpps %xmm8,%xmm13
6337 69,15,82,248, //rsqrtps %xmm8,%xmm15
6338 243,15,16,26, //movss (%rdx),%xmm3
6339 243,68,15,16,66,72, //movss 0x48(%rdx),%xmm8
6340 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6341 69,15,40,240, //movaps %xmm8,%xmm14
6342 68,15,89,240, //mulps %xmm0,%xmm14
6343 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6344 243,68,15,16,82,76, //movss 0x4c(%rdx),%xmm10
6345 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
6346 243,68,15,16,90,80, //movss 0x50(%rdx),%xmm11
6347 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6348 243,68,15,16,98,84, //movss 0x54(%rdx),%xmm12
6349 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6350 69,15,89,235, //mulps %xmm11,%xmm13
6351 69,15,88,236, //addps %xmm12,%xmm13
6352 69,15,89,250, //mulps %xmm10,%xmm15
6353 69,15,88,253, //addps %xmm13,%xmm15
6354 68,15,40,203, //movaps %xmm3,%xmm9
6355 69,15,93,207, //minps %xmm15,%xmm9
6356 243,68,15,16,106,88, //movss 0x58(%rdx),%xmm13
6357 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
6358 65,15,194,197,1, //cmpltps %xmm13,%xmm0
6359 68,15,84,240, //andps %xmm0,%xmm14
6360 65,15,85,193, //andnps %xmm9,%xmm0
6361 65,15,86,198, //orps %xmm14,%xmm0
6362 68,15,82,201, //rsqrtps %xmm1,%xmm9
6363 69,15,83,241, //rcpps %xmm9,%xmm14
6364 69,15,82,201, //rsqrtps %xmm9,%xmm9
6365 69,15,89,243, //mulps %xmm11,%xmm14
6366 69,15,88,244, //addps %xmm12,%xmm14
6367 69,15,89,202, //mulps %xmm10,%xmm9
6368 69,15,88,206, //addps %xmm14,%xmm9
6369 68,15,40,243, //movaps %xmm3,%xmm14
6370 69,15,93,241, //minps %xmm9,%xmm14
6371 69,15,40,200, //movaps %xmm8,%xmm9
6372 68,15,89,201, //mulps %xmm1,%xmm9
6373 65,15,194,205,1, //cmpltps %xmm13,%xmm1
6374 68,15,84,201, //andps %xmm1,%xmm9
6375 65,15,85,206, //andnps %xmm14,%xmm1
6376 65,15,86,201, //orps %xmm9,%xmm1
6377 68,15,82,202, //rsqrtps %xmm2,%xmm9
6378 69,15,83,241, //rcpps %xmm9,%xmm14
6379 69,15,89,243, //mulps %xmm11,%xmm14
6380 69,15,88,244, //addps %xmm12,%xmm14
6381 65,15,82,249, //rsqrtps %xmm9,%xmm7
6382 65,15,89,250, //mulps %xmm10,%xmm7
6383 65,15,88,254, //addps %xmm14,%xmm7
6384 15,93,223, //minps %xmm7,%xmm3
6385 68,15,89,194, //mulps %xmm2,%xmm8
6386 65,15,194,213,1, //cmpltps %xmm13,%xmm2
6387 68,15,84,194, //andps %xmm2,%xmm8
6388 15,85,211, //andnps %xmm3,%xmm2
6389 65,15,86,208, //orps %xmm8,%xmm2
6390 72,173, //lods %ds:(%rsi),%rax
6391 15,40,220, //movaps %xmm4,%xmm3
6392 15,40,229, //movaps %xmm5,%xmm4
6393 15,40,238, //movaps %xmm6,%xmm5
6394 15,40,52,36, //movaps (%rsp),%xmm6
6395 15,40,124,36,16, //movaps 0x10(%rsp),%xmm7
6396 72,131,196,40, //add $0x28,%rsp
6397 255,224, //jmpq *%rax
6398};
6399
6400CODE const uint8_t sk_scale_1_float_sse2[] = {
6401 72,173, //lods %ds:(%rsi),%rax
6402 243,68,15,16,0, //movss (%rax),%xmm8
6403 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6404 65,15,89,192, //mulps %xmm8,%xmm0
6405 65,15,89,200, //mulps %xmm8,%xmm1
6406 65,15,89,208, //mulps %xmm8,%xmm2
6407 65,15,89,216, //mulps %xmm8,%xmm3
6408 72,173, //lods %ds:(%rsi),%rax
6409 255,224, //jmpq *%rax
6410};
6411
6412CODE const uint8_t sk_scale_u8_sse2[] = {
6413 72,173, //lods %ds:(%rsi),%rax
6414 72,139,0, //mov (%rax),%rax
6415 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
6416 102,69,15,239,201, //pxor %xmm9,%xmm9
6417 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
6418 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
6419 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006420 184,129,128,128,59, //mov $0x3b808081,%eax
6421 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006422 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6423 69,15,89,200, //mulps %xmm8,%xmm9
6424 65,15,89,193, //mulps %xmm9,%xmm0
6425 65,15,89,201, //mulps %xmm9,%xmm1
6426 65,15,89,209, //mulps %xmm9,%xmm2
6427 65,15,89,217, //mulps %xmm9,%xmm3
6428 72,173, //lods %ds:(%rsi),%rax
6429 255,224, //jmpq *%rax
6430};
6431
6432CODE const uint8_t sk_lerp_1_float_sse2[] = {
6433 72,173, //lods %ds:(%rsi),%rax
6434 243,68,15,16,0, //movss (%rax),%xmm8
6435 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6436 15,92,196, //subps %xmm4,%xmm0
6437 65,15,89,192, //mulps %xmm8,%xmm0
6438 15,88,196, //addps %xmm4,%xmm0
6439 15,92,205, //subps %xmm5,%xmm1
6440 65,15,89,200, //mulps %xmm8,%xmm1
6441 15,88,205, //addps %xmm5,%xmm1
6442 15,92,214, //subps %xmm6,%xmm2
6443 65,15,89,208, //mulps %xmm8,%xmm2
6444 15,88,214, //addps %xmm6,%xmm2
6445 15,92,223, //subps %xmm7,%xmm3
6446 65,15,89,216, //mulps %xmm8,%xmm3
6447 15,88,223, //addps %xmm7,%xmm3
6448 72,173, //lods %ds:(%rsi),%rax
6449 255,224, //jmpq *%rax
6450};
6451
6452CODE const uint8_t sk_lerp_u8_sse2[] = {
6453 72,173, //lods %ds:(%rsi),%rax
6454 72,139,0, //mov (%rax),%rax
6455 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
6456 102,69,15,239,201, //pxor %xmm9,%xmm9
6457 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
6458 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
6459 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006460 184,129,128,128,59, //mov $0x3b808081,%eax
6461 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006462 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6463 69,15,89,200, //mulps %xmm8,%xmm9
6464 15,92,196, //subps %xmm4,%xmm0
6465 65,15,89,193, //mulps %xmm9,%xmm0
6466 15,88,196, //addps %xmm4,%xmm0
6467 15,92,205, //subps %xmm5,%xmm1
6468 65,15,89,201, //mulps %xmm9,%xmm1
6469 15,88,205, //addps %xmm5,%xmm1
6470 15,92,214, //subps %xmm6,%xmm2
6471 65,15,89,209, //mulps %xmm9,%xmm2
6472 15,88,214, //addps %xmm6,%xmm2
6473 15,92,223, //subps %xmm7,%xmm3
6474 65,15,89,217, //mulps %xmm9,%xmm3
6475 15,88,223, //addps %xmm7,%xmm3
6476 72,173, //lods %ds:(%rsi),%rax
6477 255,224, //jmpq *%rax
6478};
6479
6480CODE const uint8_t sk_lerp_565_sse2[] = {
6481 72,173, //lods %ds:(%rsi),%rax
6482 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006483 243,68,15,126,12,120, //movq (%rax,%rdi,2),%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006484 102,15,239,219, //pxor %xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006485 102,68,15,97,203, //punpcklwd %xmm3,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -05006486 102,15,110,90,104, //movd 0x68(%rdx),%xmm3
6487 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006488 102,65,15,219,217, //pand %xmm9,%xmm3
6489 68,15,91,211, //cvtdq2ps %xmm3,%xmm10
6490 243,68,15,16,90,116, //movss 0x74(%rdx),%xmm11
6491 243,68,15,16,66,120, //movss 0x78(%rdx),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006492 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006493 69,15,89,218, //mulps %xmm10,%xmm11
6494 102,15,110,90,108, //movd 0x6c(%rdx),%xmm3
6495 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
6496 102,65,15,219,217, //pand %xmm9,%xmm3
6497 15,91,219, //cvtdq2ps %xmm3,%xmm3
6498 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6499 68,15,89,195, //mulps %xmm3,%xmm8
6500 102,15,110,90,112, //movd 0x70(%rdx),%xmm3
6501 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
6502 102,65,15,219,217, //pand %xmm9,%xmm3
6503 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
6504 243,15,16,90,124, //movss 0x7c(%rdx),%xmm3
6505 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6506 65,15,89,217, //mulps %xmm9,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006507 15,92,196, //subps %xmm4,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006508 65,15,89,195, //mulps %xmm11,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006509 15,88,196, //addps %xmm4,%xmm0
6510 15,92,205, //subps %xmm5,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006511 65,15,89,200, //mulps %xmm8,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -05006512 15,88,205, //addps %xmm5,%xmm1
6513 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006514 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -05006515 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006516 184,0,0,128,63, //mov $0x3f800000,%eax
6517 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -05006518 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6519 72,173, //lods %ds:(%rsi),%rax
6520 255,224, //jmpq *%rax
6521};
6522
6523CODE const uint8_t sk_load_tables_sse2[] = {
6524 72,173, //lods %ds:(%rsi),%rax
6525 72,139,8, //mov (%rax),%rcx
6526 76,139,64,8, //mov 0x8(%rax),%r8
6527 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
6528 102,15,110,66,16, //movd 0x10(%rdx),%xmm0
6529 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
6530 102,69,15,111,200, //movdqa %xmm8,%xmm9
6531 102,65,15,114,209,8, //psrld $0x8,%xmm9
6532 102,68,15,219,200, //pand %xmm0,%xmm9
6533 102,69,15,111,208, //movdqa %xmm8,%xmm10
6534 102,65,15,114,210,16, //psrld $0x10,%xmm10
6535 102,68,15,219,208, //pand %xmm0,%xmm10
6536 102,65,15,219,192, //pand %xmm8,%xmm0
6537 102,15,112,216,78, //pshufd $0x4e,%xmm0,%xmm3
6538 102,72,15,126,217, //movq %xmm3,%rcx
6539 65,137,201, //mov %ecx,%r9d
6540 72,193,233,32, //shr $0x20,%rcx
6541 102,73,15,126,194, //movq %xmm0,%r10
6542 69,137,211, //mov %r10d,%r11d
6543 73,193,234,32, //shr $0x20,%r10
6544 243,67,15,16,28,144, //movss (%r8,%r10,4),%xmm3
6545 243,65,15,16,4,136, //movss (%r8,%rcx,4),%xmm0
6546 15,20,216, //unpcklps %xmm0,%xmm3
6547 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
6548 243,67,15,16,12,136, //movss (%r8,%r9,4),%xmm1
6549 15,20,193, //unpcklps %xmm1,%xmm0
6550 15,20,195, //unpcklps %xmm3,%xmm0
6551 72,139,72,16, //mov 0x10(%rax),%rcx
6552 102,65,15,112,201,78, //pshufd $0x4e,%xmm9,%xmm1
6553 102,73,15,126,200, //movq %xmm1,%r8
6554 69,137,193, //mov %r8d,%r9d
6555 73,193,232,32, //shr $0x20,%r8
6556 102,77,15,126,202, //movq %xmm9,%r10
6557 69,137,211, //mov %r10d,%r11d
6558 73,193,234,32, //shr $0x20,%r10
6559 243,66,15,16,28,145, //movss (%rcx,%r10,4),%xmm3
6560 243,66,15,16,12,129, //movss (%rcx,%r8,4),%xmm1
6561 15,20,217, //unpcklps %xmm1,%xmm3
6562 243,66,15,16,12,153, //movss (%rcx,%r11,4),%xmm1
6563 243,66,15,16,20,137, //movss (%rcx,%r9,4),%xmm2
6564 15,20,202, //unpcklps %xmm2,%xmm1
6565 15,20,203, //unpcklps %xmm3,%xmm1
6566 72,139,64,24, //mov 0x18(%rax),%rax
6567 102,65,15,112,210,78, //pshufd $0x4e,%xmm10,%xmm2
6568 102,72,15,126,209, //movq %xmm2,%rcx
6569 65,137,200, //mov %ecx,%r8d
6570 72,193,233,32, //shr $0x20,%rcx
6571 102,77,15,126,209, //movq %xmm10,%r9
6572 69,137,202, //mov %r9d,%r10d
6573 73,193,233,32, //shr $0x20,%r9
6574 243,70,15,16,12,136, //movss (%rax,%r9,4),%xmm9
6575 243,15,16,20,136, //movss (%rax,%rcx,4),%xmm2
6576 68,15,20,202, //unpcklps %xmm2,%xmm9
6577 243,66,15,16,20,144, //movss (%rax,%r10,4),%xmm2
6578 243,66,15,16,28,128, //movss (%rax,%r8,4),%xmm3
6579 15,20,211, //unpcklps %xmm3,%xmm2
6580 65,15,20,209, //unpcklps %xmm9,%xmm2
6581 102,65,15,114,208,24, //psrld $0x18,%xmm8
6582 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
6583 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
6584 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6585 65,15,89,216, //mulps %xmm8,%xmm3
6586 72,173, //lods %ds:(%rsi),%rax
6587 255,224, //jmpq *%rax
6588};
6589
6590CODE const uint8_t sk_load_a8_sse2[] = {
6591 72,173, //lods %ds:(%rsi),%rax
6592 72,139,0, //mov (%rax),%rax
6593 102,15,110,4,56, //movd (%rax,%rdi,1),%xmm0
6594 102,15,239,201, //pxor %xmm1,%xmm1
6595 102,15,96,193, //punpcklbw %xmm1,%xmm0
6596 102,15,97,193, //punpcklwd %xmm1,%xmm0
6597 15,91,192, //cvtdq2ps %xmm0,%xmm0
6598 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
6599 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6600 15,89,216, //mulps %xmm0,%xmm3
6601 72,173, //lods %ds:(%rsi),%rax
6602 15,87,192, //xorps %xmm0,%xmm0
6603 102,15,239,201, //pxor %xmm1,%xmm1
6604 15,87,210, //xorps %xmm2,%xmm2
6605 255,224, //jmpq *%rax
6606};
6607
6608CODE const uint8_t sk_store_a8_sse2[] = {
6609 72,173, //lods %ds:(%rsi),%rax
6610 72,139,0, //mov (%rax),%rax
6611 243,68,15,16,66,8, //movss 0x8(%rdx),%xmm8
6612 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6613 68,15,89,195, //mulps %xmm3,%xmm8
6614 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
6615 102,65,15,114,240,16, //pslld $0x10,%xmm8
6616 102,65,15,114,224,16, //psrad $0x10,%xmm8
6617 102,69,15,107,192, //packssdw %xmm8,%xmm8
6618 102,69,15,103,192, //packuswb %xmm8,%xmm8
6619 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
6620 72,173, //lods %ds:(%rsi),%rax
6621 255,224, //jmpq *%rax
6622};
6623
6624CODE const uint8_t sk_load_565_sse2[] = {
6625 72,173, //lods %ds:(%rsi),%rax
6626 72,139,0, //mov (%rax),%rax
6627 243,68,15,126,12,120, //movq (%rax,%rdi,2),%xmm9
6628 102,15,239,192, //pxor %xmm0,%xmm0
6629 102,68,15,97,200, //punpcklwd %xmm0,%xmm9
6630 102,15,110,66,104, //movd 0x68(%rdx),%xmm0
6631 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
6632 102,65,15,219,193, //pand %xmm9,%xmm0
6633 15,91,200, //cvtdq2ps %xmm0,%xmm1
6634 243,15,16,26, //movss (%rdx),%xmm3
6635 243,15,16,66,116, //movss 0x74(%rdx),%xmm0
6636 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6637 15,89,193, //mulps %xmm1,%xmm0
6638 102,15,110,74,108, //movd 0x6c(%rdx),%xmm1
6639 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
6640 102,65,15,219,201, //pand %xmm9,%xmm1
6641 68,15,91,193, //cvtdq2ps %xmm1,%xmm8
6642 243,15,16,74,120, //movss 0x78(%rdx),%xmm1
6643 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
6644 65,15,89,200, //mulps %xmm8,%xmm1
6645 102,15,110,82,112, //movd 0x70(%rdx),%xmm2
6646 102,15,112,210,0, //pshufd $0x0,%xmm2,%xmm2
6647 102,65,15,219,209, //pand %xmm9,%xmm2
6648 68,15,91,194, //cvtdq2ps %xmm2,%xmm8
6649 243,15,16,82,124, //movss 0x7c(%rdx),%xmm2
6650 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
6651 65,15,89,208, //mulps %xmm8,%xmm2
6652 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6653 72,173, //lods %ds:(%rsi),%rax
6654 255,224, //jmpq *%rax
6655};
6656
6657CODE const uint8_t sk_store_565_sse2[] = {
6658 72,173, //lods %ds:(%rsi),%rax
6659 72,139,0, //mov (%rax),%rax
6660 243,68,15,16,130,128,0,0,0, //movss 0x80(%rdx),%xmm8
6661 243,68,15,16,138,132,0,0,0, //movss 0x84(%rdx),%xmm9
6662 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6663 69,15,40,208, //movaps %xmm8,%xmm10
6664 68,15,89,208, //mulps %xmm0,%xmm10
6665 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
6666 102,65,15,114,242,11, //pslld $0xb,%xmm10
6667 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6668 68,15,89,201, //mulps %xmm1,%xmm9
6669 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
6670 102,65,15,114,241,5, //pslld $0x5,%xmm9
6671 102,69,15,235,202, //por %xmm10,%xmm9
6672 68,15,89,194, //mulps %xmm2,%xmm8
6673 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
6674 102,69,15,86,193, //orpd %xmm9,%xmm8
6675 102,65,15,114,240,16, //pslld $0x10,%xmm8
6676 102,65,15,114,224,16, //psrad $0x10,%xmm8
6677 102,69,15,107,192, //packssdw %xmm8,%xmm8
6678 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
6679 72,173, //lods %ds:(%rsi),%rax
6680 255,224, //jmpq *%rax
6681};
6682
6683CODE const uint8_t sk_load_8888_sse2[] = {
6684 72,173, //lods %ds:(%rsi),%rax
6685 72,139,0, //mov (%rax),%rax
6686 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006687 184,255,0,0,0, //mov $0xff,%eax
6688 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -05006689 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
6690 102,15,111,203, //movdqa %xmm3,%xmm1
6691 102,15,114,209,8, //psrld $0x8,%xmm1
6692 102,15,219,200, //pand %xmm0,%xmm1
6693 102,15,111,211, //movdqa %xmm3,%xmm2
6694 102,15,114,210,16, //psrld $0x10,%xmm2
6695 102,15,219,208, //pand %xmm0,%xmm2
6696 102,15,219,195, //pand %xmm3,%xmm0
6697 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006698 184,129,128,128,59, //mov $0x3b808081,%eax
6699 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006700 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6701 65,15,89,192, //mulps %xmm8,%xmm0
6702 15,91,201, //cvtdq2ps %xmm1,%xmm1
6703 65,15,89,200, //mulps %xmm8,%xmm1
6704 15,91,210, //cvtdq2ps %xmm2,%xmm2
6705 65,15,89,208, //mulps %xmm8,%xmm2
6706 102,15,114,211,24, //psrld $0x18,%xmm3
6707 15,91,219, //cvtdq2ps %xmm3,%xmm3
6708 65,15,89,216, //mulps %xmm8,%xmm3
6709 72,173, //lods %ds:(%rsi),%rax
6710 255,224, //jmpq *%rax
6711};
6712
6713CODE const uint8_t sk_store_8888_sse2[] = {
6714 72,173, //lods %ds:(%rsi),%rax
6715 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006716 185,0,0,127,67, //mov $0x437f0000,%ecx
6717 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -05006718 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6719 69,15,40,200, //movaps %xmm8,%xmm9
6720 68,15,89,200, //mulps %xmm0,%xmm9
6721 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
6722 69,15,40,208, //movaps %xmm8,%xmm10
6723 68,15,89,209, //mulps %xmm1,%xmm10
6724 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
6725 102,65,15,114,242,8, //pslld $0x8,%xmm10
6726 102,69,15,235,209, //por %xmm9,%xmm10
6727 69,15,40,200, //movaps %xmm8,%xmm9
6728 68,15,89,202, //mulps %xmm2,%xmm9
6729 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
6730 102,65,15,114,241,16, //pslld $0x10,%xmm9
6731 68,15,89,195, //mulps %xmm3,%xmm8
6732 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
6733 102,65,15,114,240,24, //pslld $0x18,%xmm8
6734 102,69,15,235,193, //por %xmm9,%xmm8
6735 102,69,15,235,194, //por %xmm10,%xmm8
6736 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
6737 72,173, //lods %ds:(%rsi),%rax
6738 255,224, //jmpq *%rax
6739};
6740
6741CODE const uint8_t sk_load_f16_sse2[] = {
6742 72,173, //lods %ds:(%rsi),%rax
6743 72,139,0, //mov (%rax),%rax
6744 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
6745 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
6746 102,15,111,208, //movdqa %xmm0,%xmm2
6747 102,15,97,209, //punpcklwd %xmm1,%xmm2
6748 102,15,105,193, //punpckhwd %xmm1,%xmm0
6749 102,68,15,111,194, //movdqa %xmm2,%xmm8
6750 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
6751 102,15,105,208, //punpckhwd %xmm0,%xmm2
6752 102,15,110,66,100, //movd 0x64(%rdx),%xmm0
6753 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
6754 102,15,111,203, //movdqa %xmm3,%xmm1
6755 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
6756 102,65,15,223,200, //pandn %xmm8,%xmm1
6757 102,15,101,218, //pcmpgtw %xmm2,%xmm3
6758 102,15,223,218, //pandn %xmm2,%xmm3
6759 102,69,15,239,192, //pxor %xmm8,%xmm8
6760 102,15,111,193, //movdqa %xmm1,%xmm0
6761 102,65,15,97,192, //punpcklwd %xmm8,%xmm0
6762 102,15,114,240,13, //pslld $0xd,%xmm0
6763 102,15,110,82,92, //movd 0x5c(%rdx),%xmm2
6764 102,68,15,112,202,0, //pshufd $0x0,%xmm2,%xmm9
6765 65,15,89,193, //mulps %xmm9,%xmm0
6766 102,65,15,105,200, //punpckhwd %xmm8,%xmm1
6767 102,15,114,241,13, //pslld $0xd,%xmm1
6768 65,15,89,201, //mulps %xmm9,%xmm1
6769 102,15,111,211, //movdqa %xmm3,%xmm2
6770 102,65,15,97,208, //punpcklwd %xmm8,%xmm2
6771 102,15,114,242,13, //pslld $0xd,%xmm2
6772 65,15,89,209, //mulps %xmm9,%xmm2
6773 102,65,15,105,216, //punpckhwd %xmm8,%xmm3
6774 102,15,114,243,13, //pslld $0xd,%xmm3
6775 65,15,89,217, //mulps %xmm9,%xmm3
6776 72,173, //lods %ds:(%rsi),%rax
6777 255,224, //jmpq *%rax
6778};
6779
6780CODE const uint8_t sk_store_f16_sse2[] = {
6781 72,173, //lods %ds:(%rsi),%rax
6782 72,139,0, //mov (%rax),%rax
6783 102,68,15,110,66,96, //movd 0x60(%rdx),%xmm8
6784 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
6785 102,69,15,111,200, //movdqa %xmm8,%xmm9
6786 68,15,89,200, //mulps %xmm0,%xmm9
6787 102,65,15,114,209,13, //psrld $0xd,%xmm9
6788 102,69,15,111,208, //movdqa %xmm8,%xmm10
6789 68,15,89,209, //mulps %xmm1,%xmm10
6790 102,65,15,114,210,13, //psrld $0xd,%xmm10
6791 102,69,15,111,216, //movdqa %xmm8,%xmm11
6792 68,15,89,218, //mulps %xmm2,%xmm11
6793 102,65,15,114,211,13, //psrld $0xd,%xmm11
6794 68,15,89,195, //mulps %xmm3,%xmm8
6795 102,65,15,114,208,13, //psrld $0xd,%xmm8
6796 102,65,15,115,250,2, //pslldq $0x2,%xmm10
6797 102,69,15,235,209, //por %xmm9,%xmm10
6798 102,65,15,115,248,2, //pslldq $0x2,%xmm8
6799 102,69,15,235,195, //por %xmm11,%xmm8
6800 102,69,15,111,202, //movdqa %xmm10,%xmm9
6801 102,69,15,98,200, //punpckldq %xmm8,%xmm9
6802 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
6803 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
6804 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
6805 72,173, //lods %ds:(%rsi),%rax
6806 255,224, //jmpq *%rax
6807};
6808
6809CODE const uint8_t sk_store_f32_sse2[] = {
6810 72,173, //lods %ds:(%rsi),%rax
6811 72,139,0, //mov (%rax),%rax
6812 72,137,249, //mov %rdi,%rcx
6813 72,193,225,4, //shl $0x4,%rcx
6814 68,15,40,192, //movaps %xmm0,%xmm8
6815 68,15,40,200, //movaps %xmm0,%xmm9
6816 68,15,20,201, //unpcklps %xmm1,%xmm9
6817 68,15,40,210, //movaps %xmm2,%xmm10
6818 68,15,40,218, //movaps %xmm2,%xmm11
6819 68,15,20,219, //unpcklps %xmm3,%xmm11
6820 68,15,21,193, //unpckhps %xmm1,%xmm8
6821 68,15,21,211, //unpckhps %xmm3,%xmm10
6822 69,15,40,225, //movaps %xmm9,%xmm12
6823 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
6824 102,69,15,21,203, //unpckhpd %xmm11,%xmm9
6825 69,15,40,216, //movaps %xmm8,%xmm11
6826 102,69,15,20,218, //unpcklpd %xmm10,%xmm11
6827 102,69,15,21,194, //unpckhpd %xmm10,%xmm8
6828 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
6829 102,68,15,17,76,8,16, //movupd %xmm9,0x10(%rax,%rcx,1)
6830 102,68,15,17,92,8,32, //movupd %xmm11,0x20(%rax,%rcx,1)
6831 102,68,15,17,68,8,48, //movupd %xmm8,0x30(%rax,%rcx,1)
6832 72,173, //lods %ds:(%rsi),%rax
6833 255,224, //jmpq *%rax
6834};
6835
6836CODE const uint8_t sk_clamp_x_sse2[] = {
6837 72,173, //lods %ds:(%rsi),%rax
6838 69,15,87,192, //xorps %xmm8,%xmm8
6839 68,15,95,192, //maxps %xmm0,%xmm8
6840 243,68,15,16,8, //movss (%rax),%xmm9
6841 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6842 102,15,118,192, //pcmpeqd %xmm0,%xmm0
6843 102,65,15,254,193, //paddd %xmm9,%xmm0
6844 68,15,93,192, //minps %xmm0,%xmm8
6845 72,173, //lods %ds:(%rsi),%rax
6846 65,15,40,192, //movaps %xmm8,%xmm0
6847 255,224, //jmpq *%rax
6848};
6849
6850CODE const uint8_t sk_clamp_y_sse2[] = {
6851 72,173, //lods %ds:(%rsi),%rax
6852 69,15,87,192, //xorps %xmm8,%xmm8
6853 68,15,95,193, //maxps %xmm1,%xmm8
6854 243,68,15,16,8, //movss (%rax),%xmm9
6855 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6856 102,15,118,201, //pcmpeqd %xmm1,%xmm1
6857 102,65,15,254,201, //paddd %xmm9,%xmm1
6858 68,15,93,193, //minps %xmm1,%xmm8
6859 72,173, //lods %ds:(%rsi),%rax
6860 65,15,40,200, //movaps %xmm8,%xmm1
6861 255,224, //jmpq *%rax
6862};
6863
6864CODE const uint8_t sk_repeat_x_sse2[] = {
6865 72,173, //lods %ds:(%rsi),%rax
6866 243,68,15,16,0, //movss (%rax),%xmm8
6867 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6868 68,15,40,200, //movaps %xmm0,%xmm9
6869 69,15,94,200, //divps %xmm8,%xmm9
6870 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
6871 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
6872 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006873 184,0,0,128,63, //mov $0x3f800000,%eax
6874 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05006875 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6876 69,15,84,217, //andps %xmm9,%xmm11
6877 69,15,92,211, //subps %xmm11,%xmm10
6878 69,15,89,208, //mulps %xmm8,%xmm10
6879 65,15,92,194, //subps %xmm10,%xmm0
6880 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
6881 102,69,15,254,200, //paddd %xmm8,%xmm9
6882 65,15,93,193, //minps %xmm9,%xmm0
6883 72,173, //lods %ds:(%rsi),%rax
6884 255,224, //jmpq *%rax
6885};
6886
6887CODE const uint8_t sk_repeat_y_sse2[] = {
6888 72,173, //lods %ds:(%rsi),%rax
6889 243,68,15,16,0, //movss (%rax),%xmm8
6890 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6891 68,15,40,201, //movaps %xmm1,%xmm9
6892 69,15,94,200, //divps %xmm8,%xmm9
6893 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
6894 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
6895 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006896 184,0,0,128,63, //mov $0x3f800000,%eax
6897 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -05006898 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
6899 69,15,84,217, //andps %xmm9,%xmm11
6900 69,15,92,211, //subps %xmm11,%xmm10
6901 69,15,89,208, //mulps %xmm8,%xmm10
6902 65,15,92,202, //subps %xmm10,%xmm1
6903 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
6904 102,69,15,254,200, //paddd %xmm8,%xmm9
6905 65,15,93,201, //minps %xmm9,%xmm1
6906 72,173, //lods %ds:(%rsi),%rax
6907 255,224, //jmpq *%rax
6908};
6909
6910CODE const uint8_t sk_mirror_x_sse2[] = {
6911 72,173, //lods %ds:(%rsi),%rax
6912 243,68,15,16,8, //movss (%rax),%xmm9
6913 69,15,40,193, //movaps %xmm9,%xmm8
6914 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6915 65,15,92,192, //subps %xmm8,%xmm0
6916 243,69,15,88,201, //addss %xmm9,%xmm9
6917 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6918 68,15,40,208, //movaps %xmm0,%xmm10
6919 69,15,94,209, //divps %xmm9,%xmm10
6920 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
6921 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
6922 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006923 184,0,0,128,63, //mov $0x3f800000,%eax
6924 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05006925 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6926 69,15,84,226, //andps %xmm10,%xmm12
6927 69,15,87,210, //xorps %xmm10,%xmm10
6928 69,15,92,220, //subps %xmm12,%xmm11
6929 69,15,89,217, //mulps %xmm9,%xmm11
6930 65,15,92,195, //subps %xmm11,%xmm0
6931 65,15,92,192, //subps %xmm8,%xmm0
6932 68,15,92,208, //subps %xmm0,%xmm10
6933 65,15,84,194, //andps %xmm10,%xmm0
6934 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
6935 102,69,15,254,200, //paddd %xmm8,%xmm9
6936 65,15,93,193, //minps %xmm9,%xmm0
6937 72,173, //lods %ds:(%rsi),%rax
6938 255,224, //jmpq *%rax
6939};
6940
6941CODE const uint8_t sk_mirror_y_sse2[] = {
6942 72,173, //lods %ds:(%rsi),%rax
6943 243,68,15,16,8, //movss (%rax),%xmm9
6944 69,15,40,193, //movaps %xmm9,%xmm8
6945 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6946 65,15,92,200, //subps %xmm8,%xmm1
6947 243,69,15,88,201, //addss %xmm9,%xmm9
6948 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
6949 68,15,40,209, //movaps %xmm1,%xmm10
6950 69,15,94,209, //divps %xmm9,%xmm10
6951 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
6952 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
6953 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05006954 184,0,0,128,63, //mov $0x3f800000,%eax
6955 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -05006956 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
6957 69,15,84,226, //andps %xmm10,%xmm12
6958 69,15,87,210, //xorps %xmm10,%xmm10
6959 69,15,92,220, //subps %xmm12,%xmm11
6960 69,15,89,217, //mulps %xmm9,%xmm11
6961 65,15,92,203, //subps %xmm11,%xmm1
6962 65,15,92,200, //subps %xmm8,%xmm1
6963 68,15,92,209, //subps %xmm1,%xmm10
6964 65,15,84,202, //andps %xmm10,%xmm1
6965 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
6966 102,69,15,254,200, //paddd %xmm8,%xmm9
6967 65,15,93,201, //minps %xmm9,%xmm1
6968 72,173, //lods %ds:(%rsi),%rax
6969 255,224, //jmpq *%rax
6970};
6971
Mike Kleine9ed07d2017-03-07 12:28:11 -05006972CODE const uint8_t sk_luminance_to_alpha_sse2[] = {
6973 243,15,16,154,136,0,0,0, //movss 0x88(%rdx),%xmm3
6974 243,68,15,16,130,140,0,0,0, //movss 0x8c(%rdx),%xmm8
6975 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6976 15,89,216, //mulps %xmm0,%xmm3
6977 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
6978 68,15,89,193, //mulps %xmm1,%xmm8
6979 68,15,88,195, //addps %xmm3,%xmm8
6980 243,15,16,154,144,0,0,0, //movss 0x90(%rdx),%xmm3
6981 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
6982 15,89,218, //mulps %xmm2,%xmm3
6983 65,15,88,216, //addps %xmm8,%xmm3
6984 72,173, //lods %ds:(%rsi),%rax
6985 15,87,192, //xorps %xmm0,%xmm0
6986 15,87,201, //xorps %xmm1,%xmm1
6987 15,87,210, //xorps %xmm2,%xmm2
6988 255,224, //jmpq *%rax
6989};
6990
Mike Klein894d5612017-03-07 07:59:52 -05006991CODE const uint8_t sk_matrix_2x3_sse2[] = {
6992 68,15,40,201, //movaps %xmm1,%xmm9
6993 68,15,40,192, //movaps %xmm0,%xmm8
6994 72,173, //lods %ds:(%rsi),%rax
6995 243,15,16,0, //movss (%rax),%xmm0
6996 243,15,16,72,4, //movss 0x4(%rax),%xmm1
6997 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
6998 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
6999 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7000 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
7001 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7002 69,15,89,209, //mulps %xmm9,%xmm10
7003 69,15,88,211, //addps %xmm11,%xmm10
7004 65,15,89,192, //mulps %xmm8,%xmm0
7005 65,15,88,194, //addps %xmm10,%xmm0
7006 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7007 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
7008 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7009 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
7010 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7011 69,15,89,209, //mulps %xmm9,%xmm10
7012 69,15,88,211, //addps %xmm11,%xmm10
7013 65,15,89,200, //mulps %xmm8,%xmm1
7014 65,15,88,202, //addps %xmm10,%xmm1
7015 72,173, //lods %ds:(%rsi),%rax
7016 255,224, //jmpq *%rax
7017};
7018
7019CODE const uint8_t sk_matrix_3x4_sse2[] = {
7020 68,15,40,201, //movaps %xmm1,%xmm9
7021 68,15,40,192, //movaps %xmm0,%xmm8
7022 72,173, //lods %ds:(%rsi),%rax
7023 243,15,16,0, //movss (%rax),%xmm0
7024 243,15,16,72,4, //movss 0x4(%rax),%xmm1
7025 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7026 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
7027 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7028 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
7029 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7030 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
7031 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7032 68,15,89,218, //mulps %xmm2,%xmm11
7033 69,15,88,220, //addps %xmm12,%xmm11
7034 69,15,89,209, //mulps %xmm9,%xmm10
7035 69,15,88,211, //addps %xmm11,%xmm10
7036 65,15,89,192, //mulps %xmm8,%xmm0
7037 65,15,88,194, //addps %xmm10,%xmm0
7038 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7039 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
7040 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7041 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
7042 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7043 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
7044 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7045 68,15,89,218, //mulps %xmm2,%xmm11
7046 69,15,88,220, //addps %xmm12,%xmm11
7047 69,15,89,209, //mulps %xmm9,%xmm10
7048 69,15,88,211, //addps %xmm11,%xmm10
7049 65,15,89,200, //mulps %xmm8,%xmm1
7050 65,15,88,202, //addps %xmm10,%xmm1
7051 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
7052 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7053 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
7054 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7055 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
7056 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7057 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
7058 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7059 68,15,89,226, //mulps %xmm2,%xmm12
7060 69,15,88,229, //addps %xmm13,%xmm12
7061 69,15,89,217, //mulps %xmm9,%xmm11
7062 69,15,88,220, //addps %xmm12,%xmm11
7063 69,15,89,208, //mulps %xmm8,%xmm10
7064 69,15,88,211, //addps %xmm11,%xmm10
7065 72,173, //lods %ds:(%rsi),%rax
7066 65,15,40,210, //movaps %xmm10,%xmm2
7067 255,224, //jmpq *%rax
7068};
7069
Mike Kleine9ed07d2017-03-07 12:28:11 -05007070CODE const uint8_t sk_matrix_4x5_sse2[] = {
7071 68,15,40,201, //movaps %xmm1,%xmm9
7072 68,15,40,192, //movaps %xmm0,%xmm8
7073 72,173, //lods %ds:(%rsi),%rax
7074 243,15,16,0, //movss (%rax),%xmm0
7075 243,15,16,72,4, //movss 0x4(%rax),%xmm1
7076 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7077 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
7078 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7079 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
7080 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7081 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
7082 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7083 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
7084 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7085 68,15,89,227, //mulps %xmm3,%xmm12
7086 69,15,88,229, //addps %xmm13,%xmm12
7087 68,15,89,218, //mulps %xmm2,%xmm11
7088 69,15,88,220, //addps %xmm12,%xmm11
7089 69,15,89,209, //mulps %xmm9,%xmm10
7090 69,15,88,211, //addps %xmm11,%xmm10
7091 65,15,89,192, //mulps %xmm8,%xmm0
7092 65,15,88,194, //addps %xmm10,%xmm0
7093 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7094 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
7095 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7096 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
7097 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7098 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
7099 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7100 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
7101 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7102 68,15,89,227, //mulps %xmm3,%xmm12
7103 69,15,88,229, //addps %xmm13,%xmm12
7104 68,15,89,218, //mulps %xmm2,%xmm11
7105 69,15,88,220, //addps %xmm12,%xmm11
7106 69,15,89,209, //mulps %xmm9,%xmm10
7107 69,15,88,211, //addps %xmm11,%xmm10
7108 65,15,89,200, //mulps %xmm8,%xmm1
7109 65,15,88,202, //addps %xmm10,%xmm1
7110 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
7111 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7112 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
7113 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7114 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
7115 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7116 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
7117 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7118 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
7119 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
7120 68,15,89,235, //mulps %xmm3,%xmm13
7121 69,15,88,238, //addps %xmm14,%xmm13
7122 68,15,89,226, //mulps %xmm2,%xmm12
7123 69,15,88,229, //addps %xmm13,%xmm12
7124 69,15,89,217, //mulps %xmm9,%xmm11
7125 69,15,88,220, //addps %xmm12,%xmm11
7126 69,15,89,208, //mulps %xmm8,%xmm10
7127 69,15,88,211, //addps %xmm11,%xmm10
7128 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
7129 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7130 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
7131 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7132 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
7133 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
7134 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
7135 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
7136 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
7137 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
7138 68,15,89,243, //mulps %xmm3,%xmm14
7139 69,15,88,247, //addps %xmm15,%xmm14
7140 68,15,89,234, //mulps %xmm2,%xmm13
7141 69,15,88,238, //addps %xmm14,%xmm13
7142 69,15,89,225, //mulps %xmm9,%xmm12
7143 69,15,88,229, //addps %xmm13,%xmm12
7144 69,15,89,216, //mulps %xmm8,%xmm11
7145 69,15,88,220, //addps %xmm12,%xmm11
7146 72,173, //lods %ds:(%rsi),%rax
7147 65,15,40,210, //movaps %xmm10,%xmm2
7148 65,15,40,219, //movaps %xmm11,%xmm3
7149 255,224, //jmpq *%rax
7150};
7151
Mike Klein894d5612017-03-07 07:59:52 -05007152CODE const uint8_t sk_matrix_perspective_sse2[] = {
7153 68,15,40,192, //movaps %xmm0,%xmm8
7154 72,173, //lods %ds:(%rsi),%rax
7155 243,15,16,0, //movss (%rax),%xmm0
7156 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
7157 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
7158 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7159 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
7160 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7161 68,15,89,201, //mulps %xmm1,%xmm9
7162 69,15,88,202, //addps %xmm10,%xmm9
7163 65,15,89,192, //mulps %xmm8,%xmm0
7164 65,15,88,193, //addps %xmm9,%xmm0
7165 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
7166 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
7167 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
7168 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7169 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
7170 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7171 68,15,89,209, //mulps %xmm1,%xmm10
7172 69,15,88,211, //addps %xmm11,%xmm10
7173 69,15,89,200, //mulps %xmm8,%xmm9
7174 69,15,88,202, //addps %xmm10,%xmm9
7175 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
7176 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
7177 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
7178 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
7179 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
7180 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
7181 68,15,89,217, //mulps %xmm1,%xmm11
7182 69,15,88,220, //addps %xmm12,%xmm11
7183 69,15,89,208, //mulps %xmm8,%xmm10
7184 69,15,88,211, //addps %xmm11,%xmm10
7185 65,15,83,202, //rcpps %xmm10,%xmm1
7186 15,89,193, //mulps %xmm1,%xmm0
7187 68,15,89,201, //mulps %xmm1,%xmm9
7188 72,173, //lods %ds:(%rsi),%rax
7189 65,15,40,201, //movaps %xmm9,%xmm1
7190 255,224, //jmpq *%rax
7191};
7192
7193CODE const uint8_t sk_linear_gradient_2stops_sse2[] = {
7194 72,173, //lods %ds:(%rsi),%rax
7195 68,15,16,8, //movups (%rax),%xmm9
7196 15,16,88,16, //movups 0x10(%rax),%xmm3
7197 68,15,40,195, //movaps %xmm3,%xmm8
7198 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
7199 65,15,40,201, //movaps %xmm9,%xmm1
7200 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
7201 68,15,89,192, //mulps %xmm0,%xmm8
7202 68,15,88,193, //addps %xmm1,%xmm8
7203 15,40,203, //movaps %xmm3,%xmm1
7204 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
7205 65,15,40,209, //movaps %xmm9,%xmm2
7206 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
7207 15,89,200, //mulps %xmm0,%xmm1
7208 15,88,202, //addps %xmm2,%xmm1
7209 15,40,211, //movaps %xmm3,%xmm2
7210 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
7211 69,15,40,209, //movaps %xmm9,%xmm10
7212 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
7213 15,89,208, //mulps %xmm0,%xmm2
7214 65,15,88,210, //addps %xmm10,%xmm2
7215 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
7216 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
7217 15,89,216, //mulps %xmm0,%xmm3
7218 65,15,88,217, //addps %xmm9,%xmm3
7219 72,173, //lods %ds:(%rsi),%rax
7220 65,15,40,192, //movaps %xmm8,%xmm0
7221 255,224, //jmpq *%rax
7222};
7223#elif defined(_M_X64)
7224
7225CODE const uint8_t sk_start_pipeline_hsw[] = {
7226 65,87, //push %r15
7227 65,86, //push %r14
7228 65,85, //push %r13
7229 65,84, //push %r12
7230 86, //push %rsi
7231 87, //push %rdi
7232 83, //push %rbx
7233 72,129,236,160,0,0,0, //sub $0xa0,%rsp
7234 197,120,41,188,36,144,0,0,0, //vmovaps %xmm15,0x90(%rsp)
7235 197,120,41,180,36,128,0,0,0, //vmovaps %xmm14,0x80(%rsp)
7236 197,120,41,108,36,112, //vmovaps %xmm13,0x70(%rsp)
7237 197,120,41,100,36,96, //vmovaps %xmm12,0x60(%rsp)
7238 197,120,41,92,36,80, //vmovaps %xmm11,0x50(%rsp)
7239 197,120,41,84,36,64, //vmovaps %xmm10,0x40(%rsp)
7240 197,120,41,76,36,48, //vmovaps %xmm9,0x30(%rsp)
7241 197,120,41,68,36,32, //vmovaps %xmm8,0x20(%rsp)
7242 197,248,41,124,36,16, //vmovaps %xmm7,0x10(%rsp)
7243 197,248,41,52,36, //vmovaps %xmm6,(%rsp)
7244 77,137,205, //mov %r9,%r13
7245 77,137,198, //mov %r8,%r14
7246 72,137,203, //mov %rcx,%rbx
7247 72,137,214, //mov %rdx,%rsi
7248 72,173, //lods %ds:(%rsi),%rax
7249 73,137,199, //mov %rax,%r15
7250 73,137,244, //mov %rsi,%r12
7251 72,141,67,8, //lea 0x8(%rbx),%rax
7252 76,57,232, //cmp %r13,%rax
7253 118,5, //jbe 75 <_sk_start_pipeline_hsw+0x75>
7254 72,137,223, //mov %rbx,%rdi
7255 235,65, //jmp b6 <_sk_start_pipeline_hsw+0xb6>
7256 185,0,0,0,0, //mov $0x0,%ecx
7257 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
7258 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
7259 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
7260 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7261 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
7262 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
7263 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
7264 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
7265 72,137,223, //mov %rbx,%rdi
7266 76,137,230, //mov %r12,%rsi
7267 76,137,242, //mov %r14,%rdx
7268 65,255,215, //callq *%r15
7269 72,141,123,8, //lea 0x8(%rbx),%rdi
7270 72,131,195,16, //add $0x10,%rbx
7271 76,57,235, //cmp %r13,%rbx
7272 72,137,251, //mov %rdi,%rbx
7273 118,191, //jbe 75 <_sk_start_pipeline_hsw+0x75>
7274 76,137,233, //mov %r13,%rcx
7275 72,41,249, //sub %rdi,%rcx
7276 116,41, //je e7 <_sk_start_pipeline_hsw+0xe7>
7277 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
7278 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
7279 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
7280 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7281 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
7282 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
7283 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
7284 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
7285 76,137,230, //mov %r12,%rsi
7286 76,137,242, //mov %r14,%rdx
7287 65,255,215, //callq *%r15
7288 76,137,232, //mov %r13,%rax
7289 197,248,40,52,36, //vmovaps (%rsp),%xmm6
7290 197,248,40,124,36,16, //vmovaps 0x10(%rsp),%xmm7
7291 197,120,40,68,36,32, //vmovaps 0x20(%rsp),%xmm8
7292 197,120,40,76,36,48, //vmovaps 0x30(%rsp),%xmm9
7293 197,120,40,84,36,64, //vmovaps 0x40(%rsp),%xmm10
7294 197,120,40,92,36,80, //vmovaps 0x50(%rsp),%xmm11
7295 197,120,40,100,36,96, //vmovaps 0x60(%rsp),%xmm12
7296 197,120,40,108,36,112, //vmovaps 0x70(%rsp),%xmm13
7297 197,120,40,180,36,128,0,0,0, //vmovaps 0x80(%rsp),%xmm14
7298 197,120,40,188,36,144,0,0,0, //vmovaps 0x90(%rsp),%xmm15
7299 72,129,196,160,0,0,0, //add $0xa0,%rsp
7300 91, //pop %rbx
7301 95, //pop %rdi
7302 94, //pop %rsi
7303 65,92, //pop %r12
7304 65,93, //pop %r13
7305 65,94, //pop %r14
7306 65,95, //pop %r15
7307 197,248,119, //vzeroupper
7308 195, //retq
7309};
7310
7311CODE const uint8_t sk_just_return_hsw[] = {
7312 195, //retq
7313};
7314
7315CODE const uint8_t sk_seed_shader_hsw[] = {
7316 72,173, //lods %ds:(%rsi),%rax
7317 197,249,110,199, //vmovd %edi,%xmm0
7318 196,226,125,24,192, //vbroadcastss %xmm0,%ymm0
7319 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007320 65,184,0,0,0,63, //mov $0x3f000000,%r8d
7321 196,193,121,110,200, //vmovd %r8d,%xmm1
7322 196,226,125,24,201, //vbroadcastss %xmm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05007323 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
7324 197,252,88,66,20, //vaddps 0x14(%rdx),%ymm0,%ymm0
7325 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
7326 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
7327 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007328 184,0,0,128,63, //mov $0x3f800000,%eax
7329 197,249,110,208, //vmovd %eax,%xmm2
7330 196,226,125,24,210, //vbroadcastss %xmm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05007331 72,173, //lods %ds:(%rsi),%rax
7332 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7333 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
7334 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
7335 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
7336 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
7337 255,224, //jmpq *%rax
7338};
7339
7340CODE const uint8_t sk_constant_color_hsw[] = {
7341 72,173, //lods %ds:(%rsi),%rax
7342 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
7343 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
7344 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
7345 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
7346 72,173, //lods %ds:(%rsi),%rax
7347 255,224, //jmpq *%rax
7348};
7349
7350CODE const uint8_t sk_clear_hsw[] = {
7351 72,173, //lods %ds:(%rsi),%rax
7352 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
7353 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
7354 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
7355 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
7356 255,224, //jmpq *%rax
7357};
7358
7359CODE const uint8_t sk_plus__hsw[] = {
7360 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
7361 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
7362 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
7363 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
7364 72,173, //lods %ds:(%rsi),%rax
7365 255,224, //jmpq *%rax
7366};
7367
7368CODE const uint8_t sk_srcover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007369 184,0,0,128,63, //mov $0x3f800000,%eax
7370 197,121,110,192, //vmovd %eax,%xmm8
7371 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007372 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
7373 196,194,93,184,192, //vfmadd231ps %ymm8,%ymm4,%ymm0
7374 196,194,85,184,200, //vfmadd231ps %ymm8,%ymm5,%ymm1
7375 196,194,77,184,208, //vfmadd231ps %ymm8,%ymm6,%ymm2
7376 196,194,69,184,216, //vfmadd231ps %ymm8,%ymm7,%ymm3
7377 72,173, //lods %ds:(%rsi),%rax
7378 255,224, //jmpq *%rax
7379};
7380
7381CODE const uint8_t sk_dstover_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007382 184,0,0,128,63, //mov $0x3f800000,%eax
7383 197,121,110,192, //vmovd %eax,%xmm8
7384 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007385 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
7386 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
7387 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
7388 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
7389 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
7390 72,173, //lods %ds:(%rsi),%rax
7391 255,224, //jmpq *%rax
7392};
7393
7394CODE const uint8_t sk_clamp_0_hsw[] = {
7395 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
7396 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
7397 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
7398 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
7399 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
7400 72,173, //lods %ds:(%rsi),%rax
7401 255,224, //jmpq *%rax
7402};
7403
7404CODE const uint8_t sk_clamp_1_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007405 184,0,0,128,63, //mov $0x3f800000,%eax
7406 197,121,110,192, //vmovd %eax,%xmm8
7407 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007408 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
7409 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
7410 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
7411 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
7412 72,173, //lods %ds:(%rsi),%rax
7413 255,224, //jmpq *%rax
7414};
7415
7416CODE const uint8_t sk_clamp_a_hsw[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007417 184,0,0,128,63, //mov $0x3f800000,%eax
7418 197,121,110,192, //vmovd %eax,%xmm8
7419 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05007420 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
7421 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
7422 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
7423 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
7424 72,173, //lods %ds:(%rsi),%rax
7425 255,224, //jmpq *%rax
7426};
7427
7428CODE const uint8_t sk_set_rgb_hsw[] = {
7429 72,173, //lods %ds:(%rsi),%rax
7430 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
7431 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
7432 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
7433 72,173, //lods %ds:(%rsi),%rax
7434 255,224, //jmpq *%rax
7435};
7436
7437CODE const uint8_t sk_swap_rb_hsw[] = {
7438 197,124,40,192, //vmovaps %ymm0,%ymm8
7439 72,173, //lods %ds:(%rsi),%rax
7440 197,252,40,194, //vmovaps %ymm2,%ymm0
7441 197,124,41,194, //vmovaps %ymm8,%ymm2
7442 255,224, //jmpq *%rax
7443};
7444
7445CODE const uint8_t sk_swap_hsw[] = {
7446 197,124,40,195, //vmovaps %ymm3,%ymm8
7447 197,124,40,202, //vmovaps %ymm2,%ymm9
7448 197,124,40,209, //vmovaps %ymm1,%ymm10
7449 197,124,40,216, //vmovaps %ymm0,%ymm11
7450 72,173, //lods %ds:(%rsi),%rax
7451 197,252,40,196, //vmovaps %ymm4,%ymm0
7452 197,252,40,205, //vmovaps %ymm5,%ymm1
7453 197,252,40,214, //vmovaps %ymm6,%ymm2
7454 197,252,40,223, //vmovaps %ymm7,%ymm3
7455 197,124,41,220, //vmovaps %ymm11,%ymm4
7456 197,124,41,213, //vmovaps %ymm10,%ymm5
7457 197,124,41,206, //vmovaps %ymm9,%ymm6
7458 197,124,41,199, //vmovaps %ymm8,%ymm7
7459 255,224, //jmpq *%rax
7460};
7461
7462CODE const uint8_t sk_move_src_dst_hsw[] = {
7463 72,173, //lods %ds:(%rsi),%rax
7464 197,252,40,224, //vmovaps %ymm0,%ymm4
7465 197,252,40,233, //vmovaps %ymm1,%ymm5
7466 197,252,40,242, //vmovaps %ymm2,%ymm6
7467 197,252,40,251, //vmovaps %ymm3,%ymm7
7468 255,224, //jmpq *%rax
7469};
7470
7471CODE const uint8_t sk_move_dst_src_hsw[] = {
7472 72,173, //lods %ds:(%rsi),%rax
7473 197,252,40,196, //vmovaps %ymm4,%ymm0
7474 197,252,40,205, //vmovaps %ymm5,%ymm1
7475 197,252,40,214, //vmovaps %ymm6,%ymm2
7476 197,252,40,223, //vmovaps %ymm7,%ymm3
7477 255,224, //jmpq *%rax
7478};
7479
7480CODE const uint8_t sk_premul_hsw[] = {
7481 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
7482 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
7483 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
7484 72,173, //lods %ds:(%rsi),%rax
7485 255,224, //jmpq *%rax
7486};
7487
7488CODE const uint8_t sk_unpremul_hsw[] = {
7489 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
7490 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007491 184,0,0,128,63, //mov $0x3f800000,%eax
7492 197,121,110,208, //vmovd %eax,%xmm10
7493 196,66,125,24,210, //vbroadcastss %xmm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05007494 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
7495 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
7496 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
7497 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
7498 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
7499 72,173, //lods %ds:(%rsi),%rax
7500 255,224, //jmpq *%rax
7501};
7502
7503CODE const uint8_t sk_from_srgb_hsw[] = {
7504 196,98,125,24,66,64, //vbroadcastss 0x40(%rdx),%ymm8
7505 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
7506 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
7507 196,98,125,24,90,60, //vbroadcastss 0x3c(%rdx),%ymm11
7508 196,98,125,24,98,56, //vbroadcastss 0x38(%rdx),%ymm12
7509 196,65,124,40,235, //vmovaps %ymm11,%ymm13
7510 196,66,125,168,236, //vfmadd213ps %ymm12,%ymm0,%ymm13
7511 196,98,125,24,114,52, //vbroadcastss 0x34(%rdx),%ymm14
7512 196,66,45,168,238, //vfmadd213ps %ymm14,%ymm10,%ymm13
7513 196,98,125,24,82,68, //vbroadcastss 0x44(%rdx),%ymm10
7514 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
7515 196,195,21,74,193,0, //vblendvps %ymm0,%ymm9,%ymm13,%ymm0
7516 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
7517 197,116,89,233, //vmulps %ymm1,%ymm1,%ymm13
7518 196,65,124,40,251, //vmovaps %ymm11,%ymm15
7519 196,66,117,168,252, //vfmadd213ps %ymm12,%ymm1,%ymm15
7520 196,66,21,168,254, //vfmadd213ps %ymm14,%ymm13,%ymm15
7521 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
7522 196,195,5,74,201,16, //vblendvps %ymm1,%ymm9,%ymm15,%ymm1
7523 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
7524 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
7525 196,66,109,168,220, //vfmadd213ps %ymm12,%ymm2,%ymm11
7526 196,66,53,168,222, //vfmadd213ps %ymm14,%ymm9,%ymm11
7527 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
7528 196,195,37,74,208,32, //vblendvps %ymm2,%ymm8,%ymm11,%ymm2
7529 72,173, //lods %ds:(%rsi),%rax
7530 255,224, //jmpq *%rax
7531};
7532
7533CODE const uint8_t sk_to_srgb_hsw[] = {
7534 197,124,82,192, //vrsqrtps %ymm0,%ymm8
7535 196,65,124,83,200, //vrcpps %ymm8,%ymm9
7536 196,65,124,82,208, //vrsqrtps %ymm8,%ymm10
7537 196,98,125,24,66,72, //vbroadcastss 0x48(%rdx),%ymm8
7538 197,60,89,216, //vmulps %ymm0,%ymm8,%ymm11
7539 196,98,125,24,34, //vbroadcastss (%rdx),%ymm12
7540 196,98,125,24,106,76, //vbroadcastss 0x4c(%rdx),%ymm13
7541 196,98,125,24,114,80, //vbroadcastss 0x50(%rdx),%ymm14
7542 196,98,125,24,122,84, //vbroadcastss 0x54(%rdx),%ymm15
7543 196,66,13,168,207, //vfmadd213ps %ymm15,%ymm14,%ymm9
7544 196,66,21,184,202, //vfmadd231ps %ymm10,%ymm13,%ymm9
7545 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
7546 196,98,125,24,82,88, //vbroadcastss 0x58(%rdx),%ymm10
7547 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
7548 196,195,53,74,195,0, //vblendvps %ymm0,%ymm11,%ymm9,%ymm0
7549 197,124,82,201, //vrsqrtps %ymm1,%ymm9
7550 196,65,124,83,217, //vrcpps %ymm9,%ymm11
7551 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
7552 196,66,13,168,223, //vfmadd213ps %ymm15,%ymm14,%ymm11
7553 196,66,21,184,217, //vfmadd231ps %ymm9,%ymm13,%ymm11
7554 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
7555 196,65,28,93,219, //vminps %ymm11,%ymm12,%ymm11
7556 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
7557 196,195,37,74,201,16, //vblendvps %ymm1,%ymm9,%ymm11,%ymm1
7558 197,124,82,202, //vrsqrtps %ymm2,%ymm9
7559 196,65,124,83,217, //vrcpps %ymm9,%ymm11
7560 196,66,13,168,223, //vfmadd213ps %ymm15,%ymm14,%ymm11
7561 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
7562 196,66,21,184,217, //vfmadd231ps %ymm9,%ymm13,%ymm11
7563 196,65,28,93,203, //vminps %ymm11,%ymm12,%ymm9
7564 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
7565 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
7566 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
7567 72,173, //lods %ds:(%rsi),%rax
7568 255,224, //jmpq *%rax
7569};
7570
7571CODE const uint8_t sk_scale_1_float_hsw[] = {
7572 72,173, //lods %ds:(%rsi),%rax
7573 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
7574 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
7575 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
7576 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
7577 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
7578 72,173, //lods %ds:(%rsi),%rax
7579 255,224, //jmpq *%rax
7580};
7581
7582CODE const uint8_t sk_scale_u8_hsw[] = {
7583 73,137,200, //mov %rcx,%r8
7584 72,173, //lods %ds:(%rsi),%rax
7585 72,139,0, //mov (%rax),%rax
7586 72,1,248, //add %rdi,%rax
7587 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007588 117,56, //jne 4f9 <_sk_scale_u8_hsw+0x48>
Mike Klein894d5612017-03-07 07:59:52 -05007589 197,123,16,0, //vmovsd (%rax),%xmm8
7590 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
7591 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007592 184,129,128,128,59, //mov $0x3b808081,%eax
7593 197,121,110,200, //vmovd %eax,%xmm9
7594 196,66,125,24,201, //vbroadcastss %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05007595 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
7596 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
7597 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
7598 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
7599 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
7600 72,173, //lods %ds:(%rsi),%rax
7601 76,137,193, //mov %r8,%rcx
7602 255,224, //jmpq *%rax
7603 49,201, //xor %ecx,%ecx
7604 77,137,194, //mov %r8,%r10
7605 69,49,201, //xor %r9d,%r9d
7606 68,15,182,24, //movzbl (%rax),%r11d
7607 72,255,192, //inc %rax
7608 73,211,227, //shl %cl,%r11
7609 77,9,217, //or %r11,%r9
7610 72,131,193,8, //add $0x8,%rcx
7611 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007612 117,234, //jne 501 <_sk_scale_u8_hsw+0x50>
Mike Klein894d5612017-03-07 07:59:52 -05007613 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007614 235,167, //jmp 4c5 <_sk_scale_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05007615};
7616
7617CODE const uint8_t sk_lerp_1_float_hsw[] = {
7618 72,173, //lods %ds:(%rsi),%rax
7619 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
7620 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
7621 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
7622 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
7623 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
7624 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
7625 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
7626 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
7627 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
7628 72,173, //lods %ds:(%rsi),%rax
7629 255,224, //jmpq *%rax
7630};
7631
7632CODE const uint8_t sk_lerp_u8_hsw[] = {
7633 73,137,200, //mov %rcx,%r8
7634 72,173, //lods %ds:(%rsi),%rax
7635 72,139,0, //mov (%rax),%rax
7636 72,1,248, //add %rdi,%rax
7637 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007638 117,76, //jne 5a9 <_sk_lerp_u8_hsw+0x5c>
Mike Klein894d5612017-03-07 07:59:52 -05007639 197,123,16,0, //vmovsd (%rax),%xmm8
7640 196,66,125,49,192, //vpmovzxbd %xmm8,%ymm8
7641 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007642 184,129,128,128,59, //mov $0x3b808081,%eax
7643 197,121,110,200, //vmovd %eax,%xmm9
7644 196,66,125,24,201, //vbroadcastss %xmm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05007645 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
7646 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
7647 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
7648 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
7649 196,226,61,168,205, //vfmadd213ps %ymm5,%ymm8,%ymm1
7650 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
7651 196,226,61,168,214, //vfmadd213ps %ymm6,%ymm8,%ymm2
7652 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
7653 196,226,61,168,223, //vfmadd213ps %ymm7,%ymm8,%ymm3
7654 72,173, //lods %ds:(%rsi),%rax
7655 76,137,193, //mov %r8,%rcx
7656 255,224, //jmpq *%rax
7657 49,201, //xor %ecx,%ecx
7658 77,137,194, //mov %r8,%r10
7659 69,49,201, //xor %r9d,%r9d
7660 68,15,182,24, //movzbl (%rax),%r11d
7661 72,255,192, //inc %rax
7662 73,211,227, //shl %cl,%r11
7663 77,9,217, //or %r11,%r9
7664 72,131,193,8, //add $0x8,%rcx
7665 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007666 117,234, //jne 5b1 <_sk_lerp_u8_hsw+0x64>
Mike Klein894d5612017-03-07 07:59:52 -05007667 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007668 235,147, //jmp 561 <_sk_lerp_u8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05007669};
7670
7671CODE const uint8_t sk_lerp_565_hsw[] = {
7672 72,173, //lods %ds:(%rsi),%rax
7673 76,139,16, //mov (%rax),%r10
7674 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007675 15,133,132,0,0,0, //jne 660 <_sk_lerp_565_hsw+0x92>
Mike Klein894d5612017-03-07 07:59:52 -05007676 196,193,122,111,28,122, //vmovdqu (%r10,%rdi,2),%xmm3
7677 196,226,125,51,219, //vpmovzxwd %xmm3,%ymm3
7678 196,98,125,88,66,104, //vpbroadcastd 0x68(%rdx),%ymm8
7679 197,61,219,195, //vpand %ymm3,%ymm8,%ymm8
7680 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
7681 196,98,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm9
7682 196,65,52,89,192, //vmulps %ymm8,%ymm9,%ymm8
7683 196,98,125,88,74,108, //vpbroadcastd 0x6c(%rdx),%ymm9
7684 197,53,219,203, //vpand %ymm3,%ymm9,%ymm9
7685 196,65,124,91,201, //vcvtdq2ps %ymm9,%ymm9
7686 196,98,125,24,82,120, //vbroadcastss 0x78(%rdx),%ymm10
7687 196,65,44,89,201, //vmulps %ymm9,%ymm10,%ymm9
7688 196,98,125,88,82,112, //vpbroadcastd 0x70(%rdx),%ymm10
7689 197,173,219,219, //vpand %ymm3,%ymm10,%ymm3
7690 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
7691 196,98,125,24,82,124, //vbroadcastss 0x7c(%rdx),%ymm10
7692 197,172,89,219, //vmulps %ymm3,%ymm10,%ymm3
7693 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
7694 196,226,61,168,196, //vfmadd213ps %ymm4,%ymm8,%ymm0
7695 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
7696 196,226,53,168,205, //vfmadd213ps %ymm5,%ymm9,%ymm1
7697 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
7698 196,226,101,168,214, //vfmadd213ps %ymm6,%ymm3,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007699 184,0,0,128,63, //mov $0x3f800000,%eax
7700 197,249,110,216, //vmovd %eax,%xmm3
7701 196,226,125,24,219, //vbroadcastss %xmm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05007702 72,173, //lods %ds:(%rsi),%rax
7703 255,224, //jmpq *%rax
7704 65,137,200, //mov %ecx,%r8d
7705 65,128,224,7, //and $0x7,%r8b
7706 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
7707 65,254,200, //dec %r8b
7708 69,15,182,192, //movzbl %r8b,%r8d
7709 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007710 15,135,102,255,255,255, //ja 5e2 <_sk_lerp_565_hsw+0x14>
7711 76,141,13,73,0,0,0, //lea 0x49(%rip),%r9 # 6cc <_sk_lerp_565_hsw+0xfe>
Mike Klein894d5612017-03-07 07:59:52 -05007712 75,99,4,129, //movslq (%r9,%r8,4),%rax
7713 76,1,200, //add %r9,%rax
7714 255,224, //jmpq *%rax
7715 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
7716 196,193,97,196,92,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm3
7717 196,193,97,196,92,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm3,%xmm3
7718 196,193,97,196,92,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm3,%xmm3
7719 196,193,97,196,92,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm3,%xmm3
7720 196,193,97,196,92,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
7721 196,193,97,196,92,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
7722 196,193,97,196,28,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007723 233,22,255,255,255, //jmpq 5e2 <_sk_lerp_565_hsw+0x14>
7724 244, //hlt
Mike Klein894d5612017-03-07 07:59:52 -05007725 255, //(bad)
7726 255, //(bad)
7727 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007728 236, //in (%dx),%al
Mike Klein894d5612017-03-07 07:59:52 -05007729 255, //(bad)
7730 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007731 255,228, //jmpq *%rsp
Mike Klein894d5612017-03-07 07:59:52 -05007732 255, //(bad)
7733 255, //(bad)
7734 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007735 220,255, //fdivr %st,%st(7)
7736 255, //(bad)
7737 255,212, //callq *%rsp
7738 255, //(bad)
7739 255, //(bad)
7740 255,204, //dec %esp
7741 255, //(bad)
7742 255, //(bad)
7743 255,192, //inc %eax
Mike Klein894d5612017-03-07 07:59:52 -05007744 255, //(bad)
7745 255, //(bad)
7746 255, //.byte 0xff
7747};
7748
7749CODE const uint8_t sk_load_tables_hsw[] = {
7750 73,137,200, //mov %rcx,%r8
7751 72,173, //lods %ds:(%rsi),%rax
7752 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
7753 76,3,8, //add (%rax),%r9
7754 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007755 117,106, //jne 767 <_sk_load_tables_hsw+0x7f>
Mike Klein894d5612017-03-07 07:59:52 -05007756 196,193,126,111,25, //vmovdqu (%r9),%ymm3
7757 196,226,125,88,82,16, //vpbroadcastd 0x10(%rdx),%ymm2
7758 197,237,219,203, //vpand %ymm3,%ymm2,%ymm1
7759 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
7760 72,139,72,8, //mov 0x8(%rax),%rcx
7761 76,139,72,16, //mov 0x10(%rax),%r9
7762 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
7763 196,226,53,146,4,137, //vgatherdps %ymm9,(%rcx,%ymm1,4),%ymm0
7764 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
7765 197,109,219,201, //vpand %ymm1,%ymm2,%ymm9
7766 196,65,45,118,210, //vpcmpeqd %ymm10,%ymm10,%ymm10
7767 196,130,45,146,12,137, //vgatherdps %ymm10,(%r9,%ymm9,4),%ymm1
7768 72,139,64,24, //mov 0x18(%rax),%rax
7769 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
7770 196,65,109,219,201, //vpand %ymm9,%ymm2,%ymm9
7771 196,162,61,146,20,136, //vgatherdps %ymm8,(%rax,%ymm9,4),%ymm2
7772 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
7773 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
7774 196,98,125,24,66,12, //vbroadcastss 0xc(%rdx),%ymm8
7775 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
7776 72,173, //lods %ds:(%rsi),%rax
7777 76,137,193, //mov %r8,%rcx
7778 255,224, //jmpq *%rax
7779 185,8,0,0,0, //mov $0x8,%ecx
7780 68,41,193, //sub %r8d,%ecx
7781 192,225,3, //shl $0x3,%cl
7782 73,199,194,255,255,255,255, //mov $0xffffffffffffffff,%r10
7783 73,211,234, //shr %cl,%r10
7784 196,193,249,110,194, //vmovq %r10,%xmm0
7785 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
7786 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007787 233,114,255,255,255, //jmpq 702 <_sk_load_tables_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05007788};
7789
7790CODE const uint8_t sk_load_a8_hsw[] = {
7791 73,137,200, //mov %rcx,%r8
7792 72,173, //lods %ds:(%rsi),%rax
7793 72,139,0, //mov (%rax),%rax
7794 72,1,248, //add %rdi,%rax
7795 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007796 117,42, //jne 7ca <_sk_load_a8_hsw+0x3a>
Mike Klein894d5612017-03-07 07:59:52 -05007797 197,251,16,0, //vmovsd (%rax),%xmm0
7798 196,226,125,49,192, //vpmovzxbd %xmm0,%ymm0
7799 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
7800 196,226,125,24,74,12, //vbroadcastss 0xc(%rdx),%ymm1
7801 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
7802 72,173, //lods %ds:(%rsi),%rax
7803 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
7804 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
7805 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
7806 76,137,193, //mov %r8,%rcx
7807 255,224, //jmpq *%rax
7808 49,201, //xor %ecx,%ecx
7809 77,137,194, //mov %r8,%r10
7810 69,49,201, //xor %r9d,%r9d
7811 68,15,182,24, //movzbl (%rax),%r11d
7812 72,255,192, //inc %rax
7813 73,211,227, //shl %cl,%r11
7814 77,9,217, //or %r11,%r9
7815 72,131,193,8, //add $0x8,%rcx
7816 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007817 117,234, //jne 7d2 <_sk_load_a8_hsw+0x42>
Mike Klein894d5612017-03-07 07:59:52 -05007818 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007819 235,181, //jmp 7a4 <_sk_load_a8_hsw+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05007820};
7821
7822CODE const uint8_t sk_store_a8_hsw[] = {
7823 72,173, //lods %ds:(%rsi),%rax
7824 76,139,8, //mov (%rax),%r9
7825 196,98,125,24,66,8, //vbroadcastss 0x8(%rdx),%ymm8
7826 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
7827 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
7828 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
7829 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
7830 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
7831 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007832 117,10, //jne 822 <_sk_store_a8_hsw+0x33>
Mike Klein894d5612017-03-07 07:59:52 -05007833 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
7834 72,173, //lods %ds:(%rsi),%rax
7835 255,224, //jmpq *%rax
7836 137,200, //mov %ecx,%eax
7837 36,7, //and $0x7,%al
7838 254,200, //dec %al
7839 68,15,182,192, //movzbl %al,%r8d
7840 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007841 119,236, //ja 81e <_sk_store_a8_hsw+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05007842 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007843 76,141,21,66,0,0,0, //lea 0x42(%rip),%r10 # 880 <_sk_store_a8_hsw+0x91>
Mike Klein894d5612017-03-07 07:59:52 -05007844 75,99,4,130, //movslq (%r10,%r8,4),%rax
7845 76,1,208, //add %r10,%rax
7846 255,224, //jmpq *%rax
7847 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
7848 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
7849 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
7850 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
7851 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
7852 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
7853 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007854 235,158, //jmp 81e <_sk_store_a8_hsw+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05007855 247,255, //idiv %edi
7856 255, //(bad)
7857 255, //(bad)
7858 239, //out %eax,(%dx)
7859 255, //(bad)
7860 255, //(bad)
7861 255,231, //jmpq *%rdi
7862 255, //(bad)
7863 255, //(bad)
7864 255, //(bad)
7865 223,255, //(bad)
7866 255, //(bad)
7867 255,215, //callq *%rdi
7868 255, //(bad)
7869 255, //(bad)
7870 255,207, //dec %edi
7871 255, //(bad)
7872 255, //(bad)
7873 255,199, //inc %edi
7874 255, //(bad)
7875 255, //(bad)
7876 255, //.byte 0xff
7877};
7878
7879CODE const uint8_t sk_load_565_hsw[] = {
7880 72,173, //lods %ds:(%rsi),%rax
7881 76,139,16, //mov (%rax),%r10
7882 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007883 117,92, //jne 902 <_sk_load_565_hsw+0x66>
Mike Klein894d5612017-03-07 07:59:52 -05007884 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
7885 196,226,125,51,208, //vpmovzxwd %xmm0,%ymm2
7886 196,226,125,88,66,104, //vpbroadcastd 0x68(%rdx),%ymm0
7887 197,253,219,194, //vpand %ymm2,%ymm0,%ymm0
7888 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
7889 196,226,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm1
7890 197,244,89,192, //vmulps %ymm0,%ymm1,%ymm0
7891 196,226,125,88,74,108, //vpbroadcastd 0x6c(%rdx),%ymm1
7892 197,245,219,202, //vpand %ymm2,%ymm1,%ymm1
7893 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
7894 196,226,125,24,90,120, //vbroadcastss 0x78(%rdx),%ymm3
7895 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
7896 196,226,125,88,90,112, //vpbroadcastd 0x70(%rdx),%ymm3
7897 197,229,219,210, //vpand %ymm2,%ymm3,%ymm2
7898 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
7899 196,226,125,24,90,124, //vbroadcastss 0x7c(%rdx),%ymm3
7900 197,228,89,210, //vmulps %ymm2,%ymm3,%ymm2
7901 196,226,125,24,26, //vbroadcastss (%rdx),%ymm3
7902 72,173, //lods %ds:(%rsi),%rax
7903 255,224, //jmpq *%rax
7904 65,137,200, //mov %ecx,%r8d
7905 65,128,224,7, //and $0x7,%r8b
7906 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
7907 65,254,200, //dec %r8b
7908 69,15,182,192, //movzbl %r8b,%r8d
7909 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007910 119,146, //ja 8ac <_sk_load_565_hsw+0x10>
7911 76,141,13,75,0,0,0, //lea 0x4b(%rip),%r9 # 96c <_sk_load_565_hsw+0xd0>
Mike Klein894d5612017-03-07 07:59:52 -05007912 75,99,4,129, //movslq (%r9,%r8,4),%rax
7913 76,1,200, //add %r9,%rax
7914 255,224, //jmpq *%rax
7915 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
7916 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
7917 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
7918 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
7919 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
7920 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
7921 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
7922 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007923 233,66,255,255,255, //jmpq 8ac <_sk_load_565_hsw+0x10>
Mike Klein894d5612017-03-07 07:59:52 -05007924 102,144, //xchg %ax,%ax
7925 242,255, //repnz (bad)
7926 255, //(bad)
7927 255, //(bad)
7928 234, //(bad)
7929 255, //(bad)
7930 255, //(bad)
7931 255,226, //jmpq *%rdx
7932 255, //(bad)
7933 255, //(bad)
7934 255, //(bad)
7935 218,255, //(bad)
7936 255, //(bad)
7937 255,210, //callq *%rdx
7938 255, //(bad)
7939 255, //(bad)
7940 255,202, //dec %edx
7941 255, //(bad)
7942 255, //(bad)
7943 255, //(bad)
7944 190, //.byte 0xbe
7945 255, //(bad)
7946 255, //(bad)
7947 255, //.byte 0xff
7948};
7949
7950CODE const uint8_t sk_store_565_hsw[] = {
7951 72,173, //lods %ds:(%rsi),%rax
7952 76,139,8, //mov (%rax),%r9
7953 196,98,125,24,130,128,0,0,0, //vbroadcastss 0x80(%rdx),%ymm8
7954 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
7955 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
7956 196,193,53,114,241,11, //vpslld $0xb,%ymm9,%ymm9
7957 196,98,125,24,146,132,0,0,0, //vbroadcastss 0x84(%rdx),%ymm10
7958 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
7959 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
7960 196,193,45,114,242,5, //vpslld $0x5,%ymm10,%ymm10
7961 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
7962 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
7963 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
7964 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
7965 196,67,125,57,193,1, //vextracti128 $0x1,%ymm8,%xmm9
7966 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
7967 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007968 117,10, //jne 9ea <_sk_store_565_hsw+0x62>
Mike Klein894d5612017-03-07 07:59:52 -05007969 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
7970 72,173, //lods %ds:(%rsi),%rax
7971 255,224, //jmpq *%rax
7972 137,200, //mov %ecx,%eax
7973 36,7, //and $0x7,%al
7974 254,200, //dec %al
7975 68,15,182,192, //movzbl %al,%r8d
7976 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007977 119,236, //ja 9e6 <_sk_store_565_hsw+0x5e>
7978 76,141,21,71,0,0,0, //lea 0x47(%rip),%r10 # a48 <_sk_store_565_hsw+0xc0>
Mike Klein894d5612017-03-07 07:59:52 -05007979 75,99,4,130, //movslq (%r10,%r8,4),%rax
7980 76,1,208, //add %r10,%rax
7981 255,224, //jmpq *%rax
7982 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
7983 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
7984 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
7985 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
7986 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
7987 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
7988 197,121,126,192, //vmovd %xmm8,%eax
7989 102,65,137,4,121, //mov %ax,(%r9,%rdi,2)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05007990 235,161, //jmp 9e6 <_sk_store_565_hsw+0x5e>
Mike Klein894d5612017-03-07 07:59:52 -05007991 15,31,0, //nopl (%rax)
7992 242,255, //repnz (bad)
7993 255, //(bad)
7994 255, //(bad)
7995 234, //(bad)
7996 255, //(bad)
7997 255, //(bad)
7998 255,226, //jmpq *%rdx
7999 255, //(bad)
8000 255, //(bad)
8001 255, //(bad)
8002 218,255, //(bad)
8003 255, //(bad)
8004 255,210, //callq *%rdx
8005 255, //(bad)
8006 255, //(bad)
8007 255,202, //dec %edx
8008 255, //(bad)
8009 255, //(bad)
8010 255,194, //inc %edx
8011 255, //(bad)
8012 255, //(bad)
8013 255, //.byte 0xff
8014};
8015
8016CODE const uint8_t sk_load_8888_hsw[] = {
8017 73,137,200, //mov %rcx,%r8
8018 72,173, //lods %ds:(%rsi),%rax
8019 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
8020 76,3,8, //add (%rax),%r9
8021 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008022 117,104, //jne ae1 <_sk_load_8888_hsw+0x7d>
Mike Klein894d5612017-03-07 07:59:52 -05008023 196,193,126,111,25, //vmovdqu (%r9),%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008024 184,255,0,0,0, //mov $0xff,%eax
8025 197,249,110,192, //vmovd %eax,%xmm0
8026 196,226,125,88,208, //vpbroadcastd %xmm0,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05008027 197,237,219,195, //vpand %ymm3,%ymm2,%ymm0
8028 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008029 184,129,128,128,59, //mov $0x3b808081,%eax
8030 197,249,110,200, //vmovd %eax,%xmm1
8031 196,98,125,24,193, //vbroadcastss %xmm1,%ymm8
8032 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05008033 197,245,114,211,8, //vpsrld $0x8,%ymm3,%ymm1
8034 197,237,219,201, //vpand %ymm1,%ymm2,%ymm1
8035 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008036 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05008037 197,181,114,211,16, //vpsrld $0x10,%ymm3,%ymm9
8038 196,193,109,219,209, //vpand %ymm9,%ymm2,%ymm2
8039 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008040 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05008041 197,229,114,211,24, //vpsrld $0x18,%ymm3,%ymm3
8042 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
8043 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
8044 72,173, //lods %ds:(%rsi),%rax
8045 76,137,193, //mov %r8,%rcx
8046 255,224, //jmpq *%rax
8047 185,8,0,0,0, //mov $0x8,%ecx
8048 68,41,193, //sub %r8d,%ecx
8049 192,225,3, //shl $0x3,%cl
8050 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
8051 72,211,232, //shr %cl,%rax
8052 196,225,249,110,192, //vmovq %rax,%xmm0
8053 196,226,125,33,192, //vpmovsxbd %xmm0,%ymm0
8054 196,194,125,140,25, //vpmaskmovd (%r9),%ymm0,%ymm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008055 233,116,255,255,255, //jmpq a7e <_sk_load_8888_hsw+0x1a>
Mike Klein894d5612017-03-07 07:59:52 -05008056};
8057
8058CODE const uint8_t sk_store_8888_hsw[] = {
8059 73,137,200, //mov %rcx,%r8
8060 72,173, //lods %ds:(%rsi),%rax
8061 76,141,12,189,0,0,0,0, //lea 0x0(,%rdi,4),%r9
8062 76,3,8, //add (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008063 184,0,0,127,67, //mov $0x437f0000,%eax
8064 197,121,110,192, //vmovd %eax,%xmm8
8065 196,66,125,24,192, //vbroadcastss %xmm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008066 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
8067 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
8068 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
8069 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
8070 196,193,45,114,242,8, //vpslld $0x8,%ymm10,%ymm10
8071 196,65,45,235,201, //vpor %ymm9,%ymm10,%ymm9
8072 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
8073 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
8074 196,193,45,114,242,16, //vpslld $0x10,%ymm10,%ymm10
8075 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
8076 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
8077 196,193,61,114,240,24, //vpslld $0x18,%ymm8,%ymm8
8078 196,65,45,235,192, //vpor %ymm8,%ymm10,%ymm8
8079 196,65,53,235,192, //vpor %ymm8,%ymm9,%ymm8
8080 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008081 117,12, //jne b7e <_sk_store_8888_hsw+0x74>
Mike Klein894d5612017-03-07 07:59:52 -05008082 196,65,126,127,1, //vmovdqu %ymm8,(%r9)
8083 72,173, //lods %ds:(%rsi),%rax
8084 76,137,193, //mov %r8,%rcx
8085 255,224, //jmpq *%rax
8086 185,8,0,0,0, //mov $0x8,%ecx
8087 68,41,193, //sub %r8d,%ecx
8088 192,225,3, //shl $0x3,%cl
8089 72,199,192,255,255,255,255, //mov $0xffffffffffffffff,%rax
8090 72,211,232, //shr %cl,%rax
8091 196,97,249,110,200, //vmovq %rax,%xmm9
8092 196,66,125,33,201, //vpmovsxbd %xmm9,%ymm9
8093 196,66,53,142,1, //vpmaskmovd %ymm8,%ymm9,(%r9)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008094 235,211, //jmp b77 <_sk_store_8888_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05008095};
8096
8097CODE const uint8_t sk_load_f16_hsw[] = {
8098 72,173, //lods %ds:(%rsi),%rax
8099 72,139,0, //mov (%rax),%rax
8100 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008101 117,97, //jne c0f <_sk_load_f16_hsw+0x6b>
Mike Klein894d5612017-03-07 07:59:52 -05008102 197,249,16,12,248, //vmovupd (%rax,%rdi,8),%xmm1
8103 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
8104 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
8105 197,121,16,68,248,48, //vmovupd 0x30(%rax,%rdi,8),%xmm8
8106 197,241,97,194, //vpunpcklwd %xmm2,%xmm1,%xmm0
8107 197,241,105,202, //vpunpckhwd %xmm2,%xmm1,%xmm1
8108 196,193,97,97,208, //vpunpcklwd %xmm8,%xmm3,%xmm2
8109 196,193,97,105,216, //vpunpckhwd %xmm8,%xmm3,%xmm3
8110 197,121,97,193, //vpunpcklwd %xmm1,%xmm0,%xmm8
8111 197,121,105,201, //vpunpckhwd %xmm1,%xmm0,%xmm9
8112 197,233,97,203, //vpunpcklwd %xmm3,%xmm2,%xmm1
8113 197,233,105,219, //vpunpckhwd %xmm3,%xmm2,%xmm3
8114 197,185,108,193, //vpunpcklqdq %xmm1,%xmm8,%xmm0
8115 196,226,125,19,192, //vcvtph2ps %xmm0,%ymm0
8116 197,185,109,201, //vpunpckhqdq %xmm1,%xmm8,%xmm1
8117 196,226,125,19,201, //vcvtph2ps %xmm1,%ymm1
8118 197,177,108,211, //vpunpcklqdq %xmm3,%xmm9,%xmm2
8119 196,226,125,19,210, //vcvtph2ps %xmm2,%ymm2
8120 197,177,109,219, //vpunpckhqdq %xmm3,%xmm9,%xmm3
8121 196,226,125,19,219, //vcvtph2ps %xmm3,%ymm3
8122 72,173, //lods %ds:(%rsi),%rax
8123 255,224, //jmpq *%rax
8124 197,251,16,12,248, //vmovsd (%rax,%rdi,8),%xmm1
8125 196,65,57,87,192, //vxorpd %xmm8,%xmm8,%xmm8
8126 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008127 117,6, //jne c25 <_sk_load_f16_hsw+0x81>
Mike Klein894d5612017-03-07 07:59:52 -05008128 197,250,126,201, //vmovq %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008129 235,30, //jmp c43 <_sk_load_f16_hsw+0x9f>
Mike Klein894d5612017-03-07 07:59:52 -05008130 197,241,22,76,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm1,%xmm1
8131 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008132 114,18, //jb c43 <_sk_load_f16_hsw+0x9f>
Mike Klein894d5612017-03-07 07:59:52 -05008133 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
8134 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008135 117,19, //jne c50 <_sk_load_f16_hsw+0xac>
Mike Klein894d5612017-03-07 07:59:52 -05008136 197,250,126,210, //vmovq %xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008137 235,46, //jmp c71 <_sk_load_f16_hsw+0xcd>
Mike Klein894d5612017-03-07 07:59:52 -05008138 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
8139 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008140 233,117,255,255,255, //jmpq bc5 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05008141 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
8142 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008143 114,21, //jb c71 <_sk_load_f16_hsw+0xcd>
Mike Klein894d5612017-03-07 07:59:52 -05008144 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
8145 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008146 117,18, //jne c7a <_sk_load_f16_hsw+0xd6>
Mike Klein894d5612017-03-07 07:59:52 -05008147 197,250,126,219, //vmovq %xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008148 233,84,255,255,255, //jmpq bc5 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05008149 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008150 233,75,255,255,255, //jmpq bc5 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05008151 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
8152 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008153 15,130,59,255,255,255, //jb bc5 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05008154 197,123,16,68,248,48, //vmovsd 0x30(%rax,%rdi,8),%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008155 233,48,255,255,255, //jmpq bc5 <_sk_load_f16_hsw+0x21>
Mike Klein894d5612017-03-07 07:59:52 -05008156};
8157
8158CODE const uint8_t sk_store_f16_hsw[] = {
8159 72,173, //lods %ds:(%rsi),%rax
8160 72,139,0, //mov (%rax),%rax
8161 196,195,125,29,192,4, //vcvtps2ph $0x4,%ymm0,%xmm8
8162 196,195,125,29,201,4, //vcvtps2ph $0x4,%ymm1,%xmm9
8163 196,195,125,29,210,4, //vcvtps2ph $0x4,%ymm2,%xmm10
8164 196,195,125,29,219,4, //vcvtps2ph $0x4,%ymm3,%xmm11
8165 196,65,57,97,225, //vpunpcklwd %xmm9,%xmm8,%xmm12
8166 196,65,57,105,193, //vpunpckhwd %xmm9,%xmm8,%xmm8
8167 196,65,41,97,203, //vpunpcklwd %xmm11,%xmm10,%xmm9
8168 196,65,41,105,235, //vpunpckhwd %xmm11,%xmm10,%xmm13
8169 196,65,25,98,217, //vpunpckldq %xmm9,%xmm12,%xmm11
8170 196,65,25,106,209, //vpunpckhdq %xmm9,%xmm12,%xmm10
8171 196,65,57,98,205, //vpunpckldq %xmm13,%xmm8,%xmm9
8172 196,65,57,106,197, //vpunpckhdq %xmm13,%xmm8,%xmm8
8173 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008174 117,27, //jne cfa <_sk_store_f16_hsw+0x65>
Mike Klein894d5612017-03-07 07:59:52 -05008175 197,120,17,28,248, //vmovups %xmm11,(%rax,%rdi,8)
8176 197,120,17,84,248,16, //vmovups %xmm10,0x10(%rax,%rdi,8)
8177 197,120,17,76,248,32, //vmovups %xmm9,0x20(%rax,%rdi,8)
8178 197,122,127,68,248,48, //vmovdqu %xmm8,0x30(%rax,%rdi,8)
8179 72,173, //lods %ds:(%rsi),%rax
8180 255,224, //jmpq *%rax
8181 197,121,214,28,248, //vmovq %xmm11,(%rax,%rdi,8)
8182 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008183 116,241, //je cf6 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008184 197,121,23,92,248,8, //vmovhpd %xmm11,0x8(%rax,%rdi,8)
8185 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008186 114,229, //jb cf6 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008187 197,121,214,84,248,16, //vmovq %xmm10,0x10(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008188 116,221, //je cf6 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008189 197,121,23,84,248,24, //vmovhpd %xmm10,0x18(%rax,%rdi,8)
8190 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008191 114,209, //jb cf6 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008192 197,121,214,76,248,32, //vmovq %xmm9,0x20(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008193 116,201, //je cf6 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008194 197,121,23,76,248,40, //vmovhpd %xmm9,0x28(%rax,%rdi,8)
8195 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008196 114,189, //jb cf6 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008197 197,121,214,68,248,48, //vmovq %xmm8,0x30(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008198 235,181, //jmp cf6 <_sk_store_f16_hsw+0x61>
Mike Klein894d5612017-03-07 07:59:52 -05008199};
8200
8201CODE const uint8_t sk_store_f32_hsw[] = {
8202 72,173, //lods %ds:(%rsi),%rax
8203 76,139,0, //mov (%rax),%r8
8204 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
8205 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
8206 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
8207 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
8208 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
8209 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
8210 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
8211 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
8212 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
8213 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008214 117,55, //jne dae <_sk_store_f32_hsw+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05008215 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
8216 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
8217 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
8218 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
8219 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
8220 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
8221 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
8222 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
8223 72,173, //lods %ds:(%rsi),%rax
8224 255,224, //jmpq *%rax
8225 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
8226 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008227 116,240, //je daa <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008228 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
8229 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008230 114,227, //jb daa <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008231 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008232 116,218, //je daa <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008233 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
8234 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008235 114,205, //jb daa <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008236 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008237 116,195, //je daa <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008238 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
8239 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008240 114,181, //jb daa <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008241 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008242 235,171, //jmp daa <_sk_store_f32_hsw+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05008243};
8244
8245CODE const uint8_t sk_clamp_x_hsw[] = {
8246 72,173, //lods %ds:(%rsi),%rax
8247 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8248 197,188,95,192, //vmaxps %ymm0,%ymm8,%ymm0
8249 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
8250 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
8251 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
8252 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
8253 72,173, //lods %ds:(%rsi),%rax
8254 255,224, //jmpq *%rax
8255};
8256
8257CODE const uint8_t sk_clamp_y_hsw[] = {
8258 72,173, //lods %ds:(%rsi),%rax
8259 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8260 197,188,95,201, //vmaxps %ymm1,%ymm8,%ymm1
8261 196,98,125,88,0, //vpbroadcastd (%rax),%ymm8
8262 196,65,53,118,201, //vpcmpeqd %ymm9,%ymm9,%ymm9
8263 196,65,61,254,193, //vpaddd %ymm9,%ymm8,%ymm8
8264 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
8265 72,173, //lods %ds:(%rsi),%rax
8266 255,224, //jmpq *%rax
8267};
8268
8269CODE const uint8_t sk_repeat_x_hsw[] = {
8270 72,173, //lods %ds:(%rsi),%rax
8271 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8272 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
8273 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
8274 196,98,61,172,200, //vfnmadd213ps %ymm0,%ymm8,%ymm9
8275 197,253,118,192, //vpcmpeqd %ymm0,%ymm0,%ymm0
8276 197,189,254,192, //vpaddd %ymm0,%ymm8,%ymm0
8277 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
8278 72,173, //lods %ds:(%rsi),%rax
8279 255,224, //jmpq *%rax
8280};
8281
8282CODE const uint8_t sk_repeat_y_hsw[] = {
8283 72,173, //lods %ds:(%rsi),%rax
8284 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8285 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
8286 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
8287 196,98,61,172,201, //vfnmadd213ps %ymm1,%ymm8,%ymm9
8288 197,245,118,201, //vpcmpeqd %ymm1,%ymm1,%ymm1
8289 197,189,254,201, //vpaddd %ymm1,%ymm8,%ymm1
8290 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
8291 72,173, //lods %ds:(%rsi),%rax
8292 255,224, //jmpq *%rax
8293};
8294
8295CODE const uint8_t sk_mirror_x_hsw[] = {
8296 72,173, //lods %ds:(%rsi),%rax
8297 197,122,16,0, //vmovss (%rax),%xmm8
8298 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
8299 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
8300 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
8301 196,226,125,24,192, //vbroadcastss %xmm0,%ymm0
8302 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
8303 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
8304 196,66,125,172,194, //vfnmadd213ps %ymm10,%ymm0,%ymm8
8305 196,193,60,92,193, //vsubps %ymm9,%ymm8,%ymm0
8306 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8307 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
8308 197,188,84,192, //vandps %ymm0,%ymm8,%ymm0
8309 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
8310 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
8311 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
8312 72,173, //lods %ds:(%rsi),%rax
8313 255,224, //jmpq *%rax
8314};
8315
8316CODE const uint8_t sk_mirror_y_hsw[] = {
8317 72,173, //lods %ds:(%rsi),%rax
8318 197,122,16,0, //vmovss (%rax),%xmm8
8319 196,66,125,24,200, //vbroadcastss %xmm8,%ymm9
8320 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
8321 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
8322 196,226,125,24,201, //vbroadcastss %xmm1,%ymm1
8323 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
8324 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
8325 196,66,117,172,194, //vfnmadd213ps %ymm10,%ymm1,%ymm8
8326 196,193,60,92,201, //vsubps %ymm9,%ymm8,%ymm1
8327 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8328 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
8329 197,188,84,201, //vandps %ymm1,%ymm8,%ymm1
8330 196,65,61,118,192, //vpcmpeqd %ymm8,%ymm8,%ymm8
8331 196,65,53,254,192, //vpaddd %ymm8,%ymm9,%ymm8
8332 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
8333 72,173, //lods %ds:(%rsi),%rax
8334 255,224, //jmpq *%rax
8335};
8336
Mike Kleine9ed07d2017-03-07 12:28:11 -05008337CODE const uint8_t sk_luminance_to_alpha_hsw[] = {
8338 196,98,125,24,130,136,0,0,0, //vbroadcastss 0x88(%rdx),%ymm8
8339 196,226,125,24,154,140,0,0,0, //vbroadcastss 0x8c(%rdx),%ymm3
8340 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
8341 196,98,125,168,193, //vfmadd213ps %ymm1,%ymm0,%ymm8
8342 196,226,125,24,154,144,0,0,0, //vbroadcastss 0x90(%rdx),%ymm3
8343 196,194,109,168,216, //vfmadd213ps %ymm8,%ymm2,%ymm3
8344 72,173, //lods %ds:(%rsi),%rax
8345 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8346 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8347 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8348 255,224, //jmpq *%rax
8349};
8350
Mike Klein894d5612017-03-07 07:59:52 -05008351CODE const uint8_t sk_matrix_2x3_hsw[] = {
8352 72,173, //lods %ds:(%rsi),%rax
8353 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
8354 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
8355 196,98,125,24,64,16, //vbroadcastss 0x10(%rax),%ymm8
8356 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
8357 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
8358 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
8359 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
8360 196,98,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm9
8361 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
8362 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
8363 72,173, //lods %ds:(%rsi),%rax
8364 197,124,41,192, //vmovaps %ymm8,%ymm0
8365 197,124,41,201, //vmovaps %ymm9,%ymm1
8366 255,224, //jmpq *%rax
8367};
8368
8369CODE const uint8_t sk_matrix_3x4_hsw[] = {
8370 72,173, //lods %ds:(%rsi),%rax
8371 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
8372 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
8373 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
8374 196,98,125,24,64,36, //vbroadcastss 0x24(%rax),%ymm8
8375 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
8376 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
8377 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
8378 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
8379 196,98,125,24,88,16, //vbroadcastss 0x10(%rax),%ymm11
8380 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
8381 196,98,125,24,72,40, //vbroadcastss 0x28(%rax),%ymm9
8382 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
8383 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
8384 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
8385 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
8386 196,98,125,24,96,20, //vbroadcastss 0x14(%rax),%ymm12
8387 196,98,125,24,104,32, //vbroadcastss 0x20(%rax),%ymm13
8388 196,98,125,24,80,44, //vbroadcastss 0x2c(%rax),%ymm10
8389 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
8390 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
8391 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
8392 72,173, //lods %ds:(%rsi),%rax
8393 197,124,41,192, //vmovaps %ymm8,%ymm0
8394 197,124,41,201, //vmovaps %ymm9,%ymm1
8395 197,124,41,210, //vmovaps %ymm10,%ymm2
8396 255,224, //jmpq *%rax
8397};
8398
Mike Kleine9ed07d2017-03-07 12:28:11 -05008399CODE const uint8_t sk_matrix_4x5_hsw[] = {
8400 72,173, //lods %ds:(%rsi),%rax
8401 196,98,125,24,8, //vbroadcastss (%rax),%ymm9
8402 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
8403 196,98,125,24,88,32, //vbroadcastss 0x20(%rax),%ymm11
8404 196,98,125,24,96,48, //vbroadcastss 0x30(%rax),%ymm12
8405 196,98,125,24,64,64, //vbroadcastss 0x40(%rax),%ymm8
8406 196,66,101,184,196, //vfmadd231ps %ymm12,%ymm3,%ymm8
8407 196,66,109,184,195, //vfmadd231ps %ymm11,%ymm2,%ymm8
8408 196,66,117,184,194, //vfmadd231ps %ymm10,%ymm1,%ymm8
8409 196,66,125,184,193, //vfmadd231ps %ymm9,%ymm0,%ymm8
8410 196,98,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm10
8411 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
8412 196,98,125,24,96,36, //vbroadcastss 0x24(%rax),%ymm12
8413 196,98,125,24,104,52, //vbroadcastss 0x34(%rax),%ymm13
8414 196,98,125,24,72,68, //vbroadcastss 0x44(%rax),%ymm9
8415 196,66,101,184,205, //vfmadd231ps %ymm13,%ymm3,%ymm9
8416 196,66,109,184,204, //vfmadd231ps %ymm12,%ymm2,%ymm9
8417 196,66,117,184,203, //vfmadd231ps %ymm11,%ymm1,%ymm9
8418 196,66,125,184,202, //vfmadd231ps %ymm10,%ymm0,%ymm9
8419 196,98,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm11
8420 196,98,125,24,96,24, //vbroadcastss 0x18(%rax),%ymm12
8421 196,98,125,24,104,40, //vbroadcastss 0x28(%rax),%ymm13
8422 196,98,125,24,112,56, //vbroadcastss 0x38(%rax),%ymm14
8423 196,98,125,24,80,72, //vbroadcastss 0x48(%rax),%ymm10
8424 196,66,101,184,214, //vfmadd231ps %ymm14,%ymm3,%ymm10
8425 196,66,109,184,213, //vfmadd231ps %ymm13,%ymm2,%ymm10
8426 196,66,117,184,212, //vfmadd231ps %ymm12,%ymm1,%ymm10
8427 196,66,125,184,211, //vfmadd231ps %ymm11,%ymm0,%ymm10
8428 196,98,125,24,96,12, //vbroadcastss 0xc(%rax),%ymm12
8429 196,98,125,24,104,28, //vbroadcastss 0x1c(%rax),%ymm13
8430 196,98,125,24,112,44, //vbroadcastss 0x2c(%rax),%ymm14
8431 196,98,125,24,120,60, //vbroadcastss 0x3c(%rax),%ymm15
8432 196,98,125,24,88,76, //vbroadcastss 0x4c(%rax),%ymm11
8433 196,66,101,184,223, //vfmadd231ps %ymm15,%ymm3,%ymm11
8434 196,66,109,184,222, //vfmadd231ps %ymm14,%ymm2,%ymm11
8435 196,66,117,184,221, //vfmadd231ps %ymm13,%ymm1,%ymm11
8436 196,66,125,184,220, //vfmadd231ps %ymm12,%ymm0,%ymm11
8437 72,173, //lods %ds:(%rsi),%rax
8438 197,124,41,192, //vmovaps %ymm8,%ymm0
8439 197,124,41,201, //vmovaps %ymm9,%ymm1
8440 197,124,41,210, //vmovaps %ymm10,%ymm2
8441 197,124,41,219, //vmovaps %ymm11,%ymm3
8442 255,224, //jmpq *%rax
8443};
8444
Mike Klein894d5612017-03-07 07:59:52 -05008445CODE const uint8_t sk_matrix_perspective_hsw[] = {
8446 72,173, //lods %ds:(%rsi),%rax
8447 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8448 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
8449 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
8450 196,66,117,184,209, //vfmadd231ps %ymm9,%ymm1,%ymm10
8451 196,66,125,184,208, //vfmadd231ps %ymm8,%ymm0,%ymm10
8452 196,98,125,24,64,12, //vbroadcastss 0xc(%rax),%ymm8
8453 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
8454 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
8455 196,66,117,184,217, //vfmadd231ps %ymm9,%ymm1,%ymm11
8456 196,66,125,184,216, //vfmadd231ps %ymm8,%ymm0,%ymm11
8457 196,98,125,24,64,24, //vbroadcastss 0x18(%rax),%ymm8
8458 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
8459 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
8460 196,66,117,184,225, //vfmadd231ps %ymm9,%ymm1,%ymm12
8461 196,66,125,184,224, //vfmadd231ps %ymm8,%ymm0,%ymm12
8462 196,193,124,83,204, //vrcpps %ymm12,%ymm1
8463 197,172,89,193, //vmulps %ymm1,%ymm10,%ymm0
8464 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
8465 72,173, //lods %ds:(%rsi),%rax
8466 255,224, //jmpq *%rax
8467};
8468
8469CODE const uint8_t sk_linear_gradient_2stops_hsw[] = {
8470 72,173, //lods %ds:(%rsi),%rax
8471 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
8472 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8473 196,98,125,184,193, //vfmadd231ps %ymm1,%ymm0,%ymm8
8474 196,226,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm2
8475 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
8476 196,226,125,184,202, //vfmadd231ps %ymm2,%ymm0,%ymm1
8477 196,226,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm3
8478 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
8479 196,226,125,184,211, //vfmadd231ps %ymm3,%ymm0,%ymm2
8480 196,98,125,24,72,28, //vbroadcastss 0x1c(%rax),%ymm9
8481 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
8482 196,194,125,184,217, //vfmadd231ps %ymm9,%ymm0,%ymm3
8483 72,173, //lods %ds:(%rsi),%rax
8484 197,124,41,192, //vmovaps %ymm8,%ymm0
8485 255,224, //jmpq *%rax
8486};
8487
8488CODE const uint8_t sk_start_pipeline_avx[] = {
8489 65,87, //push %r15
8490 65,86, //push %r14
8491 65,85, //push %r13
8492 65,84, //push %r12
8493 86, //push %rsi
8494 87, //push %rdi
8495 83, //push %rbx
8496 72,129,236,160,0,0,0, //sub $0xa0,%rsp
8497 197,120,41,188,36,144,0,0,0, //vmovaps %xmm15,0x90(%rsp)
8498 197,120,41,180,36,128,0,0,0, //vmovaps %xmm14,0x80(%rsp)
8499 197,120,41,108,36,112, //vmovaps %xmm13,0x70(%rsp)
8500 197,120,41,100,36,96, //vmovaps %xmm12,0x60(%rsp)
8501 197,120,41,92,36,80, //vmovaps %xmm11,0x50(%rsp)
8502 197,120,41,84,36,64, //vmovaps %xmm10,0x40(%rsp)
8503 197,120,41,76,36,48, //vmovaps %xmm9,0x30(%rsp)
8504 197,120,41,68,36,32, //vmovaps %xmm8,0x20(%rsp)
8505 197,248,41,124,36,16, //vmovaps %xmm7,0x10(%rsp)
8506 197,248,41,52,36, //vmovaps %xmm6,(%rsp)
8507 77,137,205, //mov %r9,%r13
8508 77,137,198, //mov %r8,%r14
8509 72,137,203, //mov %rcx,%rbx
8510 72,137,214, //mov %rdx,%rsi
8511 72,173, //lods %ds:(%rsi),%rax
8512 73,137,199, //mov %rax,%r15
8513 73,137,244, //mov %rsi,%r12
8514 72,141,67,8, //lea 0x8(%rbx),%rax
8515 76,57,232, //cmp %r13,%rax
8516 118,5, //jbe 75 <_sk_start_pipeline_avx+0x75>
8517 72,137,223, //mov %rbx,%rdi
8518 235,65, //jmp b6 <_sk_start_pipeline_avx+0xb6>
8519 185,0,0,0,0, //mov $0x0,%ecx
8520 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8521 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8522 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8523 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8524 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
8525 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
8526 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
8527 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
8528 72,137,223, //mov %rbx,%rdi
8529 76,137,230, //mov %r12,%rsi
8530 76,137,242, //mov %r14,%rdx
8531 65,255,215, //callq *%r15
8532 72,141,123,8, //lea 0x8(%rbx),%rdi
8533 72,131,195,16, //add $0x10,%rbx
8534 76,57,235, //cmp %r13,%rbx
8535 72,137,251, //mov %rdi,%rbx
8536 118,191, //jbe 75 <_sk_start_pipeline_avx+0x75>
8537 76,137,233, //mov %r13,%rcx
8538 72,41,249, //sub %rdi,%rcx
8539 116,41, //je e7 <_sk_start_pipeline_avx+0xe7>
8540 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8541 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8542 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8543 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8544 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
8545 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
8546 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
8547 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
8548 76,137,230, //mov %r12,%rsi
8549 76,137,242, //mov %r14,%rdx
8550 65,255,215, //callq *%r15
8551 76,137,232, //mov %r13,%rax
8552 197,248,40,52,36, //vmovaps (%rsp),%xmm6
8553 197,248,40,124,36,16, //vmovaps 0x10(%rsp),%xmm7
8554 197,120,40,68,36,32, //vmovaps 0x20(%rsp),%xmm8
8555 197,120,40,76,36,48, //vmovaps 0x30(%rsp),%xmm9
8556 197,120,40,84,36,64, //vmovaps 0x40(%rsp),%xmm10
8557 197,120,40,92,36,80, //vmovaps 0x50(%rsp),%xmm11
8558 197,120,40,100,36,96, //vmovaps 0x60(%rsp),%xmm12
8559 197,120,40,108,36,112, //vmovaps 0x70(%rsp),%xmm13
8560 197,120,40,180,36,128,0,0,0, //vmovaps 0x80(%rsp),%xmm14
8561 197,120,40,188,36,144,0,0,0, //vmovaps 0x90(%rsp),%xmm15
8562 72,129,196,160,0,0,0, //add $0xa0,%rsp
8563 91, //pop %rbx
8564 95, //pop %rdi
8565 94, //pop %rsi
8566 65,92, //pop %r12
8567 65,93, //pop %r13
8568 65,94, //pop %r14
8569 65,95, //pop %r15
8570 197,248,119, //vzeroupper
8571 195, //retq
8572};
8573
8574CODE const uint8_t sk_just_return_avx[] = {
8575 195, //retq
8576};
8577
8578CODE const uint8_t sk_seed_shader_avx[] = {
8579 72,173, //lods %ds:(%rsi),%rax
8580 197,249,110,199, //vmovd %edi,%xmm0
8581 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
8582 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
8583 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008584 65,184,0,0,0,63, //mov $0x3f000000,%r8d
8585 196,193,121,110,200, //vmovd %r8d,%xmm1
8586 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
8587 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05008588 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
8589 197,252,88,66,20, //vaddps 0x14(%rdx),%ymm0,%ymm0
8590 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
8591 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
8592 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008593 184,0,0,128,63, //mov $0x3f800000,%eax
8594 197,249,110,208, //vmovd %eax,%xmm2
8595 196,227,121,4,210,0, //vpermilps $0x0,%xmm2,%xmm2
8596 196,227,109,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05008597 72,173, //lods %ds:(%rsi),%rax
8598 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8599 197,220,87,228, //vxorps %ymm4,%ymm4,%ymm4
8600 197,212,87,237, //vxorps %ymm5,%ymm5,%ymm5
8601 197,204,87,246, //vxorps %ymm6,%ymm6,%ymm6
8602 197,196,87,255, //vxorps %ymm7,%ymm7,%ymm7
8603 255,224, //jmpq *%rax
8604};
8605
8606CODE const uint8_t sk_constant_color_avx[] = {
8607 72,173, //lods %ds:(%rsi),%rax
8608 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
8609 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
8610 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
8611 196,226,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm3
8612 72,173, //lods %ds:(%rsi),%rax
8613 255,224, //jmpq *%rax
8614};
8615
8616CODE const uint8_t sk_clear_avx[] = {
8617 72,173, //lods %ds:(%rsi),%rax
8618 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
8619 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
8620 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
8621 197,228,87,219, //vxorps %ymm3,%ymm3,%ymm3
8622 255,224, //jmpq *%rax
8623};
8624
8625CODE const uint8_t sk_plus__avx[] = {
8626 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
8627 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
8628 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
8629 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
8630 72,173, //lods %ds:(%rsi),%rax
8631 255,224, //jmpq *%rax
8632};
8633
8634CODE const uint8_t sk_srcover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008635 184,0,0,128,63, //mov $0x3f800000,%eax
8636 197,121,110,192, //vmovd %eax,%xmm8
8637 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
8638 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008639 197,60,92,195, //vsubps %ymm3,%ymm8,%ymm8
8640 197,60,89,204, //vmulps %ymm4,%ymm8,%ymm9
8641 197,180,88,192, //vaddps %ymm0,%ymm9,%ymm0
8642 197,60,89,205, //vmulps %ymm5,%ymm8,%ymm9
8643 197,180,88,201, //vaddps %ymm1,%ymm9,%ymm1
8644 197,60,89,206, //vmulps %ymm6,%ymm8,%ymm9
8645 197,180,88,210, //vaddps %ymm2,%ymm9,%ymm2
8646 197,60,89,199, //vmulps %ymm7,%ymm8,%ymm8
8647 197,188,88,219, //vaddps %ymm3,%ymm8,%ymm3
8648 72,173, //lods %ds:(%rsi),%rax
8649 255,224, //jmpq *%rax
8650};
8651
8652CODE const uint8_t sk_dstover_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008653 184,0,0,128,63, //mov $0x3f800000,%eax
8654 197,121,110,192, //vmovd %eax,%xmm8
8655 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
8656 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008657 197,60,92,199, //vsubps %ymm7,%ymm8,%ymm8
8658 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
8659 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
8660 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
8661 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
8662 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
8663 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
8664 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
8665 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
8666 72,173, //lods %ds:(%rsi),%rax
8667 255,224, //jmpq *%rax
8668};
8669
8670CODE const uint8_t sk_clamp_0_avx[] = {
8671 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8672 196,193,124,95,192, //vmaxps %ymm8,%ymm0,%ymm0
8673 196,193,116,95,200, //vmaxps %ymm8,%ymm1,%ymm1
8674 196,193,108,95,208, //vmaxps %ymm8,%ymm2,%ymm2
8675 196,193,100,95,216, //vmaxps %ymm8,%ymm3,%ymm3
8676 72,173, //lods %ds:(%rsi),%rax
8677 255,224, //jmpq *%rax
8678};
8679
8680CODE const uint8_t sk_clamp_1_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008681 184,0,0,128,63, //mov $0x3f800000,%eax
8682 197,121,110,192, //vmovd %eax,%xmm8
8683 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
8684 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008685 196,193,124,93,192, //vminps %ymm8,%ymm0,%ymm0
8686 196,193,116,93,200, //vminps %ymm8,%ymm1,%ymm1
8687 196,193,108,93,208, //vminps %ymm8,%ymm2,%ymm2
8688 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
8689 72,173, //lods %ds:(%rsi),%rax
8690 255,224, //jmpq *%rax
8691};
8692
8693CODE const uint8_t sk_clamp_a_avx[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008694 184,0,0,128,63, //mov $0x3f800000,%eax
8695 197,121,110,192, //vmovd %eax,%xmm8
8696 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
8697 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05008698 196,193,100,93,216, //vminps %ymm8,%ymm3,%ymm3
8699 197,252,93,195, //vminps %ymm3,%ymm0,%ymm0
8700 197,244,93,203, //vminps %ymm3,%ymm1,%ymm1
8701 197,236,93,211, //vminps %ymm3,%ymm2,%ymm2
8702 72,173, //lods %ds:(%rsi),%rax
8703 255,224, //jmpq *%rax
8704};
8705
8706CODE const uint8_t sk_set_rgb_avx[] = {
8707 72,173, //lods %ds:(%rsi),%rax
8708 196,226,125,24,0, //vbroadcastss (%rax),%ymm0
8709 196,226,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm1
8710 196,226,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm2
8711 72,173, //lods %ds:(%rsi),%rax
8712 255,224, //jmpq *%rax
8713};
8714
8715CODE const uint8_t sk_swap_rb_avx[] = {
8716 197,124,40,192, //vmovaps %ymm0,%ymm8
8717 72,173, //lods %ds:(%rsi),%rax
8718 197,252,40,194, //vmovaps %ymm2,%ymm0
8719 197,124,41,194, //vmovaps %ymm8,%ymm2
8720 255,224, //jmpq *%rax
8721};
8722
8723CODE const uint8_t sk_swap_avx[] = {
8724 197,124,40,195, //vmovaps %ymm3,%ymm8
8725 197,124,40,202, //vmovaps %ymm2,%ymm9
8726 197,124,40,209, //vmovaps %ymm1,%ymm10
8727 197,124,40,216, //vmovaps %ymm0,%ymm11
8728 72,173, //lods %ds:(%rsi),%rax
8729 197,252,40,196, //vmovaps %ymm4,%ymm0
8730 197,252,40,205, //vmovaps %ymm5,%ymm1
8731 197,252,40,214, //vmovaps %ymm6,%ymm2
8732 197,252,40,223, //vmovaps %ymm7,%ymm3
8733 197,124,41,220, //vmovaps %ymm11,%ymm4
8734 197,124,41,213, //vmovaps %ymm10,%ymm5
8735 197,124,41,206, //vmovaps %ymm9,%ymm6
8736 197,124,41,199, //vmovaps %ymm8,%ymm7
8737 255,224, //jmpq *%rax
8738};
8739
8740CODE const uint8_t sk_move_src_dst_avx[] = {
8741 72,173, //lods %ds:(%rsi),%rax
8742 197,252,40,224, //vmovaps %ymm0,%ymm4
8743 197,252,40,233, //vmovaps %ymm1,%ymm5
8744 197,252,40,242, //vmovaps %ymm2,%ymm6
8745 197,252,40,251, //vmovaps %ymm3,%ymm7
8746 255,224, //jmpq *%rax
8747};
8748
8749CODE const uint8_t sk_move_dst_src_avx[] = {
8750 72,173, //lods %ds:(%rsi),%rax
8751 197,252,40,196, //vmovaps %ymm4,%ymm0
8752 197,252,40,205, //vmovaps %ymm5,%ymm1
8753 197,252,40,214, //vmovaps %ymm6,%ymm2
8754 197,252,40,223, //vmovaps %ymm7,%ymm3
8755 255,224, //jmpq *%rax
8756};
8757
8758CODE const uint8_t sk_premul_avx[] = {
8759 197,252,89,195, //vmulps %ymm3,%ymm0,%ymm0
8760 197,244,89,203, //vmulps %ymm3,%ymm1,%ymm1
8761 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
8762 72,173, //lods %ds:(%rsi),%rax
8763 255,224, //jmpq *%rax
8764};
8765
8766CODE const uint8_t sk_unpremul_avx[] = {
8767 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
8768 196,65,100,194,200,0, //vcmpeqps %ymm8,%ymm3,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008769 184,0,0,128,63, //mov $0x3f800000,%eax
8770 197,121,110,208, //vmovd %eax,%xmm10
8771 196,67,121,4,210,0, //vpermilps $0x0,%xmm10,%xmm10
8772 196,67,45,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
Mike Klein894d5612017-03-07 07:59:52 -05008773 197,44,94,211, //vdivps %ymm3,%ymm10,%ymm10
8774 196,67,45,74,192,144, //vblendvps %ymm9,%ymm8,%ymm10,%ymm8
8775 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
8776 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
8777 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
8778 72,173, //lods %ds:(%rsi),%rax
8779 255,224, //jmpq *%rax
8780};
8781
8782CODE const uint8_t sk_from_srgb_avx[] = {
8783 196,98,125,24,66,64, //vbroadcastss 0x40(%rdx),%ymm8
8784 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
8785 197,124,89,208, //vmulps %ymm0,%ymm0,%ymm10
8786 196,98,125,24,90,60, //vbroadcastss 0x3c(%rdx),%ymm11
8787 196,98,125,24,98,56, //vbroadcastss 0x38(%rdx),%ymm12
8788 197,36,89,232, //vmulps %ymm0,%ymm11,%ymm13
8789 196,65,20,88,236, //vaddps %ymm12,%ymm13,%ymm13
8790 196,98,125,24,114,52, //vbroadcastss 0x34(%rdx),%ymm14
8791 196,65,44,89,213, //vmulps %ymm13,%ymm10,%ymm10
8792 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
8793 196,98,125,24,106,68, //vbroadcastss 0x44(%rdx),%ymm13
8794 196,193,124,194,197,1, //vcmpltps %ymm13,%ymm0,%ymm0
8795 196,195,45,74,193,0, //vblendvps %ymm0,%ymm9,%ymm10,%ymm0
8796 197,60,89,201, //vmulps %ymm1,%ymm8,%ymm9
8797 197,116,89,209, //vmulps %ymm1,%ymm1,%ymm10
8798 197,36,89,249, //vmulps %ymm1,%ymm11,%ymm15
8799 196,65,4,88,252, //vaddps %ymm12,%ymm15,%ymm15
8800 196,65,44,89,215, //vmulps %ymm15,%ymm10,%ymm10
8801 196,65,12,88,210, //vaddps %ymm10,%ymm14,%ymm10
8802 196,193,116,194,205,1, //vcmpltps %ymm13,%ymm1,%ymm1
8803 196,195,45,74,201,16, //vblendvps %ymm1,%ymm9,%ymm10,%ymm1
8804 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
8805 197,108,89,202, //vmulps %ymm2,%ymm2,%ymm9
8806 197,36,89,210, //vmulps %ymm2,%ymm11,%ymm10
8807 196,65,44,88,212, //vaddps %ymm12,%ymm10,%ymm10
8808 196,65,52,89,202, //vmulps %ymm10,%ymm9,%ymm9
8809 196,65,12,88,201, //vaddps %ymm9,%ymm14,%ymm9
8810 196,193,108,194,213,1, //vcmpltps %ymm13,%ymm2,%ymm2
8811 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
8812 72,173, //lods %ds:(%rsi),%rax
8813 255,224, //jmpq *%rax
8814};
8815
8816CODE const uint8_t sk_to_srgb_avx[] = {
8817 197,124,82,192, //vrsqrtps %ymm0,%ymm8
8818 196,65,124,83,200, //vrcpps %ymm8,%ymm9
8819 196,65,124,82,208, //vrsqrtps %ymm8,%ymm10
8820 196,98,125,24,66,72, //vbroadcastss 0x48(%rdx),%ymm8
8821 197,60,89,216, //vmulps %ymm0,%ymm8,%ymm11
8822 196,98,125,24,34, //vbroadcastss (%rdx),%ymm12
8823 196,98,125,24,106,76, //vbroadcastss 0x4c(%rdx),%ymm13
8824 196,98,125,24,114,80, //vbroadcastss 0x50(%rdx),%ymm14
8825 196,98,125,24,122,84, //vbroadcastss 0x54(%rdx),%ymm15
8826 196,65,52,89,206, //vmulps %ymm14,%ymm9,%ymm9
8827 196,65,52,88,207, //vaddps %ymm15,%ymm9,%ymm9
8828 196,65,44,89,213, //vmulps %ymm13,%ymm10,%ymm10
8829 196,65,44,88,201, //vaddps %ymm9,%ymm10,%ymm9
8830 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
8831 196,98,125,24,82,88, //vbroadcastss 0x58(%rdx),%ymm10
8832 196,193,124,194,194,1, //vcmpltps %ymm10,%ymm0,%ymm0
8833 196,195,53,74,195,0, //vblendvps %ymm0,%ymm11,%ymm9,%ymm0
8834 197,124,82,201, //vrsqrtps %ymm1,%ymm9
8835 196,65,124,83,217, //vrcpps %ymm9,%ymm11
8836 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
8837 196,65,12,89,219, //vmulps %ymm11,%ymm14,%ymm11
8838 196,65,4,88,219, //vaddps %ymm11,%ymm15,%ymm11
8839 196,65,20,89,201, //vmulps %ymm9,%ymm13,%ymm9
8840 196,65,52,88,203, //vaddps %ymm11,%ymm9,%ymm9
8841 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
8842 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
8843 196,193,116,194,202,1, //vcmpltps %ymm10,%ymm1,%ymm1
8844 196,195,53,74,203,16, //vblendvps %ymm1,%ymm11,%ymm9,%ymm1
8845 197,124,82,202, //vrsqrtps %ymm2,%ymm9
8846 196,65,124,83,217, //vrcpps %ymm9,%ymm11
8847 196,65,12,89,219, //vmulps %ymm11,%ymm14,%ymm11
8848 196,65,4,88,219, //vaddps %ymm11,%ymm15,%ymm11
8849 196,65,124,82,201, //vrsqrtps %ymm9,%ymm9
8850 196,65,20,89,201, //vmulps %ymm9,%ymm13,%ymm9
8851 196,65,52,88,203, //vaddps %ymm11,%ymm9,%ymm9
8852 196,65,28,93,201, //vminps %ymm9,%ymm12,%ymm9
8853 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
8854 196,193,108,194,210,1, //vcmpltps %ymm10,%ymm2,%ymm2
8855 196,195,53,74,208,32, //vblendvps %ymm2,%ymm8,%ymm9,%ymm2
8856 72,173, //lods %ds:(%rsi),%rax
8857 255,224, //jmpq *%rax
8858};
8859
8860CODE const uint8_t sk_scale_1_float_avx[] = {
8861 72,173, //lods %ds:(%rsi),%rax
8862 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8863 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
8864 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
8865 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
8866 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
8867 72,173, //lods %ds:(%rsi),%rax
8868 255,224, //jmpq *%rax
8869};
8870
8871CODE const uint8_t sk_scale_u8_avx[] = {
8872 73,137,200, //mov %rcx,%r8
8873 72,173, //lods %ds:(%rsi),%rax
8874 72,139,0, //mov (%rax),%rax
8875 72,1,248, //add %rdi,%rax
8876 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008877 117,80, //jne 58f <_sk_scale_u8_avx+0x60>
Mike Klein894d5612017-03-07 07:59:52 -05008878 197,123,16,0, //vmovsd (%rax),%xmm8
8879 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
8880 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
8881 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
8882 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
8883 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008884 184,129,128,128,59, //mov $0x3b808081,%eax
8885 197,121,110,200, //vmovd %eax,%xmm9
8886 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
8887 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05008888 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
8889 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
8890 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
8891 197,188,89,210, //vmulps %ymm2,%ymm8,%ymm2
8892 197,188,89,219, //vmulps %ymm3,%ymm8,%ymm3
8893 72,173, //lods %ds:(%rsi),%rax
8894 76,137,193, //mov %r8,%rcx
8895 255,224, //jmpq *%rax
8896 49,201, //xor %ecx,%ecx
8897 77,137,194, //mov %r8,%r10
8898 69,49,201, //xor %r9d,%r9d
8899 68,15,182,24, //movzbl (%rax),%r11d
8900 72,255,192, //inc %rax
8901 73,211,227, //shl %cl,%r11
8902 77,9,217, //or %r11,%r9
8903 72,131,193,8, //add $0x8,%rcx
8904 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008905 117,234, //jne 597 <_sk_scale_u8_avx+0x68>
Mike Klein894d5612017-03-07 07:59:52 -05008906 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008907 235,143, //jmp 543 <_sk_scale_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05008908};
8909
8910CODE const uint8_t sk_lerp_1_float_avx[] = {
8911 72,173, //lods %ds:(%rsi),%rax
8912 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
8913 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
8914 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
8915 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
8916 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
8917 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
8918 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
8919 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
8920 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
8921 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
8922 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
8923 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
8924 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
8925 72,173, //lods %ds:(%rsi),%rax
8926 255,224, //jmpq *%rax
8927};
8928
8929CODE const uint8_t sk_lerp_u8_avx[] = {
8930 73,137,200, //mov %rcx,%r8
8931 72,173, //lods %ds:(%rsi),%rax
8932 72,139,0, //mov (%rax),%rax
8933 72,1,248, //add %rdi,%rax
8934 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008935 117,116, //jne 677 <_sk_lerp_u8_avx+0x84>
Mike Klein894d5612017-03-07 07:59:52 -05008936 197,123,16,0, //vmovsd (%rax),%xmm8
8937 196,66,121,49,200, //vpmovzxbd %xmm8,%xmm9
8938 196,67,121,4,192,229, //vpermilps $0xe5,%xmm8,%xmm8
8939 196,66,121,49,192, //vpmovzxbd %xmm8,%xmm8
8940 196,67,53,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
8941 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008942 184,129,128,128,59, //mov $0x3b808081,%eax
8943 197,121,110,200, //vmovd %eax,%xmm9
8944 196,67,121,4,201,0, //vpermilps $0x0,%xmm9,%xmm9
8945 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
Mike Klein894d5612017-03-07 07:59:52 -05008946 196,65,60,89,193, //vmulps %ymm9,%ymm8,%ymm8
8947 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
8948 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
8949 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
8950 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
8951 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
8952 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
8953 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
8954 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
8955 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
8956 197,228,92,223, //vsubps %ymm7,%ymm3,%ymm3
8957 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
8958 197,228,88,223, //vaddps %ymm7,%ymm3,%ymm3
8959 72,173, //lods %ds:(%rsi),%rax
8960 76,137,193, //mov %r8,%rcx
8961 255,224, //jmpq *%rax
8962 49,201, //xor %ecx,%ecx
8963 77,137,194, //mov %r8,%r10
8964 69,49,201, //xor %r9d,%r9d
8965 68,15,182,24, //movzbl (%rax),%r11d
8966 72,255,192, //inc %rax
8967 73,211,227, //shl %cl,%r11
8968 77,9,217, //or %r11,%r9
8969 72,131,193,8, //add $0x8,%rcx
8970 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008971 117,234, //jne 67f <_sk_lerp_u8_avx+0x8c>
Mike Klein894d5612017-03-07 07:59:52 -05008972 196,65,249,110,193, //vmovq %r9,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008973 233,104,255,255,255, //jmpq 607 <_sk_lerp_u8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05008974};
8975
8976CODE const uint8_t sk_lerp_565_avx[] = {
8977 72,173, //lods %ds:(%rsi),%rax
8978 76,139,16, //mov (%rax),%r10
8979 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05008980 15,133,164,0,0,0, //jne 751 <_sk_lerp_565_avx+0xb2>
Mike Klein894d5612017-03-07 07:59:52 -05008981 196,65,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm8
8982 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
8983 197,185,105,219, //vpunpckhwd %xmm3,%xmm8,%xmm3
8984 196,66,121,51,192, //vpmovzxwd %xmm8,%xmm8
8985 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
8986 196,98,125,24,66,104, //vbroadcastss 0x68(%rdx),%ymm8
8987 197,60,84,195, //vandps %ymm3,%ymm8,%ymm8
8988 196,65,124,91,192, //vcvtdq2ps %ymm8,%ymm8
8989 196,98,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm9
8990 196,65,52,89,192, //vmulps %ymm8,%ymm9,%ymm8
8991 196,98,125,24,74,108, //vbroadcastss 0x6c(%rdx),%ymm9
8992 197,52,84,203, //vandps %ymm3,%ymm9,%ymm9
8993 196,65,124,91,201, //vcvtdq2ps %ymm9,%ymm9
8994 196,98,125,24,82,120, //vbroadcastss 0x78(%rdx),%ymm10
8995 196,65,44,89,201, //vmulps %ymm9,%ymm10,%ymm9
8996 196,98,125,24,82,112, //vbroadcastss 0x70(%rdx),%ymm10
8997 197,172,84,219, //vandps %ymm3,%ymm10,%ymm3
8998 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
8999 196,98,125,24,82,124, //vbroadcastss 0x7c(%rdx),%ymm10
9000 197,172,89,219, //vmulps %ymm3,%ymm10,%ymm3
9001 197,252,92,196, //vsubps %ymm4,%ymm0,%ymm0
9002 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
9003 197,252,88,196, //vaddps %ymm4,%ymm0,%ymm0
9004 197,244,92,205, //vsubps %ymm5,%ymm1,%ymm1
9005 196,193,116,89,201, //vmulps %ymm9,%ymm1,%ymm1
9006 197,244,88,205, //vaddps %ymm5,%ymm1,%ymm1
9007 197,236,92,214, //vsubps %ymm6,%ymm2,%ymm2
9008 197,236,89,211, //vmulps %ymm3,%ymm2,%ymm2
9009 197,236,88,214, //vaddps %ymm6,%ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009010 184,0,0,128,63, //mov $0x3f800000,%eax
9011 197,249,110,216, //vmovd %eax,%xmm3
9012 196,227,121,4,219,0, //vpermilps $0x0,%xmm3,%xmm3
9013 196,227,101,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
Mike Klein894d5612017-03-07 07:59:52 -05009014 72,173, //lods %ds:(%rsi),%rax
9015 255,224, //jmpq *%rax
9016 65,137,200, //mov %ecx,%r8d
9017 65,128,224,7, //and $0x7,%r8b
9018 196,65,57,239,192, //vpxor %xmm8,%xmm8,%xmm8
9019 65,254,200, //dec %r8b
9020 69,15,182,192, //movzbl %r8b,%r8d
9021 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009022 15,135,69,255,255,255, //ja 6b3 <_sk_lerp_565_avx+0x14>
9023 76,141,13,75,0,0,0, //lea 0x4b(%rip),%r9 # 7c0 <_sk_lerp_565_avx+0x121>
Mike Klein894d5612017-03-07 07:59:52 -05009024 75,99,4,129, //movslq (%r9,%r8,4),%rax
9025 76,1,200, //add %r9,%rax
9026 255,224, //jmpq *%rax
9027 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
9028 196,65,97,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm8
9029 196,65,57,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm8,%xmm8
9030 196,65,57,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm8,%xmm8
9031 196,65,57,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm8,%xmm8
9032 196,65,57,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
9033 196,65,57,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
9034 196,65,57,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009035 233,245,254,255,255, //jmpq 6b3 <_sk_lerp_565_avx+0x14>
9036 102,144, //xchg %ax,%ax
9037 242,255, //repnz (bad)
Mike Klein894d5612017-03-07 07:59:52 -05009038 255, //(bad)
9039 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009040 234, //(bad)
Mike Klein894d5612017-03-07 07:59:52 -05009041 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009042 255, //(bad)
9043 255,226, //jmpq *%rdx
Mike Klein894d5612017-03-07 07:59:52 -05009044 255, //(bad)
9045 255, //(bad)
9046 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009047 218,255, //(bad)
Mike Klein894d5612017-03-07 07:59:52 -05009048 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009049 255,210, //callq *%rdx
Mike Klein894d5612017-03-07 07:59:52 -05009050 255, //(bad)
9051 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009052 255,202, //dec %edx
Mike Klein894d5612017-03-07 07:59:52 -05009053 255, //(bad)
9054 255, //(bad)
9055 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009056 190, //.byte 0xbe
Mike Klein894d5612017-03-07 07:59:52 -05009057 255, //(bad)
9058 255, //(bad)
9059 255, //.byte 0xff
9060};
9061
9062CODE const uint8_t sk_load_tables_avx[] = {
9063 85, //push %rbp
9064 65,87, //push %r15
9065 65,86, //push %r14
9066 65,85, //push %r13
9067 65,84, //push %r12
9068 83, //push %rbx
9069 72,173, //lods %ds:(%rsi),%rax
9070 76,139,0, //mov (%rax),%r8
9071 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009072 15,133,18,2,0,0, //jne a06 <_sk_load_tables_avx+0x22a>
Mike Klein894d5612017-03-07 07:59:52 -05009073 196,65,124,16,4,184, //vmovups (%r8,%rdi,4),%ymm8
9074 196,98,125,24,74,16, //vbroadcastss 0x10(%rdx),%ymm9
9075 196,193,52,84,192, //vandps %ymm8,%ymm9,%ymm0
9076 196,193,249,126,193, //vmovq %xmm0,%r9
9077 69,137,203, //mov %r9d,%r11d
9078 196,195,249,22,194,1, //vpextrq $0x1,%xmm0,%r10
9079 69,137,214, //mov %r10d,%r14d
9080 73,193,234,32, //shr $0x20,%r10
9081 73,193,233,32, //shr $0x20,%r9
9082 196,227,125,25,192,1, //vextractf128 $0x1,%ymm0,%xmm0
9083 196,193,249,126,196, //vmovq %xmm0,%r12
9084 69,137,231, //mov %r12d,%r15d
9085 196,227,249,22,195,1, //vpextrq $0x1,%xmm0,%rbx
9086 65,137,221, //mov %ebx,%r13d
9087 72,193,235,32, //shr $0x20,%rbx
9088 73,193,236,32, //shr $0x20,%r12
9089 72,139,104,8, //mov 0x8(%rax),%rbp
9090 76,139,64,16, //mov 0x10(%rax),%r8
9091 196,161,122,16,68,189,0, //vmovss 0x0(%rbp,%r15,4),%xmm0
9092 196,163,121,33,68,165,0,16, //vinsertps $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
9093 196,163,121,33,68,173,0,32, //vinsertps $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
9094 197,250,16,76,157,0, //vmovss 0x0(%rbp,%rbx,4),%xmm1
9095 196,227,121,33,193,48, //vinsertps $0x30,%xmm1,%xmm0,%xmm0
9096 196,161,122,16,76,157,0, //vmovss 0x0(%rbp,%r11,4),%xmm1
9097 196,163,113,33,76,141,0,16, //vinsertps $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
9098 196,163,113,33,76,181,0,32, //vinsertps $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
9099 196,161,122,16,92,149,0, //vmovss 0x0(%rbp,%r10,4),%xmm3
9100 196,227,113,33,203,48, //vinsertps $0x30,%xmm3,%xmm1,%xmm1
9101 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
9102 196,193,113,114,208,8, //vpsrld $0x8,%xmm8,%xmm1
9103 196,67,125,25,194,1, //vextractf128 $0x1,%ymm8,%xmm10
9104 196,193,105,114,210,8, //vpsrld $0x8,%xmm10,%xmm2
9105 196,227,117,24,202,1, //vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
9106 197,180,84,201, //vandps %ymm1,%ymm9,%ymm1
9107 196,193,249,126,201, //vmovq %xmm1,%r9
9108 69,137,203, //mov %r9d,%r11d
9109 196,195,249,22,202,1, //vpextrq $0x1,%xmm1,%r10
9110 69,137,214, //mov %r10d,%r14d
9111 73,193,234,32, //shr $0x20,%r10
9112 73,193,233,32, //shr $0x20,%r9
9113 196,227,125,25,201,1, //vextractf128 $0x1,%ymm1,%xmm1
9114 196,225,249,126,205, //vmovq %xmm1,%rbp
9115 65,137,239, //mov %ebp,%r15d
9116 196,227,249,22,203,1, //vpextrq $0x1,%xmm1,%rbx
9117 65,137,220, //mov %ebx,%r12d
9118 72,193,235,32, //shr $0x20,%rbx
9119 72,193,237,32, //shr $0x20,%rbp
9120 196,129,122,16,12,184, //vmovss (%r8,%r15,4),%xmm1
9121 196,195,113,33,12,168,16, //vinsertps $0x10,(%r8,%rbp,4),%xmm1,%xmm1
9122 196,129,122,16,20,160, //vmovss (%r8,%r12,4),%xmm2
9123 196,227,113,33,202,32, //vinsertps $0x20,%xmm2,%xmm1,%xmm1
9124 196,193,122,16,20,152, //vmovss (%r8,%rbx,4),%xmm2
9125 196,227,113,33,202,48, //vinsertps $0x30,%xmm2,%xmm1,%xmm1
9126 196,129,122,16,20,152, //vmovss (%r8,%r11,4),%xmm2
9127 196,131,105,33,20,136,16, //vinsertps $0x10,(%r8,%r9,4),%xmm2,%xmm2
9128 196,129,122,16,28,176, //vmovss (%r8,%r14,4),%xmm3
9129 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
9130 196,129,122,16,28,144, //vmovss (%r8,%r10,4),%xmm3
9131 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
9132 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
9133 72,139,64,24, //mov 0x18(%rax),%rax
9134 196,193,105,114,208,16, //vpsrld $0x10,%xmm8,%xmm2
9135 196,193,97,114,210,16, //vpsrld $0x10,%xmm10,%xmm3
9136 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
9137 197,180,84,210, //vandps %ymm2,%ymm9,%ymm2
9138 196,193,249,126,208, //vmovq %xmm2,%r8
9139 69,137,194, //mov %r8d,%r10d
9140 196,195,249,22,209,1, //vpextrq $0x1,%xmm2,%r9
9141 69,137,203, //mov %r9d,%r11d
9142 73,193,233,32, //shr $0x20,%r9
9143 73,193,232,32, //shr $0x20,%r8
9144 196,227,125,25,210,1, //vextractf128 $0x1,%ymm2,%xmm2
9145 196,225,249,126,213, //vmovq %xmm2,%rbp
9146 65,137,238, //mov %ebp,%r14d
9147 196,227,249,22,211,1, //vpextrq $0x1,%xmm2,%rbx
9148 65,137,223, //mov %ebx,%r15d
9149 72,193,235,32, //shr $0x20,%rbx
9150 72,193,237,32, //shr $0x20,%rbp
9151 196,161,122,16,20,176, //vmovss (%rax,%r14,4),%xmm2
9152 196,227,105,33,20,168,16, //vinsertps $0x10,(%rax,%rbp,4),%xmm2,%xmm2
9153 196,161,122,16,28,184, //vmovss (%rax,%r15,4),%xmm3
9154 196,227,105,33,211,32, //vinsertps $0x20,%xmm3,%xmm2,%xmm2
9155 197,250,16,28,152, //vmovss (%rax,%rbx,4),%xmm3
9156 196,99,105,33,203,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm9
9157 196,161,122,16,28,144, //vmovss (%rax,%r10,4),%xmm3
9158 196,163,97,33,28,128,16, //vinsertps $0x10,(%rax,%r8,4),%xmm3,%xmm3
9159 196,161,122,16,20,152, //vmovss (%rax,%r11,4),%xmm2
9160 196,227,97,33,210,32, //vinsertps $0x20,%xmm2,%xmm3,%xmm2
9161 196,161,122,16,28,136, //vmovss (%rax,%r9,4),%xmm3
9162 196,227,105,33,211,48, //vinsertps $0x30,%xmm3,%xmm2,%xmm2
9163 196,195,109,24,209,1, //vinsertf128 $0x1,%xmm9,%ymm2,%ymm2
9164 196,193,57,114,208,24, //vpsrld $0x18,%xmm8,%xmm8
9165 196,193,97,114,210,24, //vpsrld $0x18,%xmm10,%xmm3
9166 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
9167 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
9168 196,98,125,24,66,12, //vbroadcastss 0xc(%rdx),%ymm8
9169 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
9170 72,173, //lods %ds:(%rsi),%rax
9171 91, //pop %rbx
9172 65,92, //pop %r12
9173 65,93, //pop %r13
9174 65,94, //pop %r14
9175 65,95, //pop %r15
9176 93, //pop %rbp
9177 255,224, //jmpq *%rax
9178 65,137,201, //mov %ecx,%r9d
9179 65,128,225,7, //and $0x7,%r9b
9180 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
9181 65,254,201, //dec %r9b
9182 69,15,182,201, //movzbl %r9b,%r9d
9183 65,128,249,6, //cmp $0x6,%r9b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009184 15,135,215,253,255,255, //ja 7fa <_sk_load_tables_avx+0x1e>
9185 76,141,21,138,0,0,0, //lea 0x8a(%rip),%r10 # ab4 <_sk_load_tables_avx+0x2d8>
Mike Klein894d5612017-03-07 07:59:52 -05009186 79,99,12,138, //movslq (%r10,%r9,4),%r9
9187 77,1,209, //add %r10,%r9
9188 65,255,225, //jmpq *%r9
9189 196,193,121,110,68,184,24, //vmovd 0x18(%r8,%rdi,4),%xmm0
9190 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
9191 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
9192 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
9193 196,99,117,12,192,64, //vblendps $0x40,%ymm0,%ymm1,%ymm8
9194 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
9195 196,195,121,34,68,184,20,1, //vpinsrd $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
9196 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
9197 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
9198 196,195,121,34,68,184,16,0, //vpinsrd $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
9199 196,99,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
9200 196,195,57,34,68,184,12,3, //vpinsrd $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
9201 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
9202 196,195,57,34,68,184,8,2, //vpinsrd $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
9203 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
9204 196,195,57,34,68,184,4,1, //vpinsrd $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
9205 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
9206 196,195,57,34,4,184,0, //vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
9207 196,99,61,12,192,15, //vblendps $0xf,%ymm0,%ymm8,%ymm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009208 233,70,253,255,255, //jmpq 7fa <_sk_load_tables_avx+0x1e>
Mike Klein894d5612017-03-07 07:59:52 -05009209 238, //out %al,(%dx)
9210 255, //(bad)
9211 255, //(bad)
9212 255,224, //jmpq *%rax
9213 255, //(bad)
9214 255, //(bad)
9215 255,210, //callq *%rdx
9216 255, //(bad)
9217 255, //(bad)
9218 255,196, //inc %esp
9219 255, //(bad)
9220 255, //(bad)
9221 255,176,255,255,255,156, //pushq -0x63000001(%rax)
9222 255, //(bad)
9223 255, //(bad)
9224 255, //.byte 0xff
9225 128,255,255, //cmp $0xff,%bh
9226 255, //.byte 0xff
9227};
9228
9229CODE const uint8_t sk_load_a8_avx[] = {
9230 73,137,200, //mov %rcx,%r8
9231 72,173, //lods %ds:(%rsi),%rax
9232 72,139,0, //mov (%rax),%rax
9233 72,1,248, //add %rdi,%rax
9234 77,133,192, //test %r8,%r8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009235 117,59, //jne b1b <_sk_load_a8_avx+0x4b>
Mike Klein894d5612017-03-07 07:59:52 -05009236 197,251,16,0, //vmovsd (%rax),%xmm0
9237 196,226,121,49,200, //vpmovzxbd %xmm0,%xmm1
9238 196,227,121,4,192,229, //vpermilps $0xe5,%xmm0,%xmm0
9239 196,226,121,49,192, //vpmovzxbd %xmm0,%xmm0
9240 196,227,117,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
9241 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
9242 196,226,125,24,74,12, //vbroadcastss 0xc(%rdx),%ymm1
9243 197,252,89,217, //vmulps %ymm1,%ymm0,%ymm3
9244 72,173, //lods %ds:(%rsi),%rax
9245 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
9246 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
9247 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
9248 76,137,193, //mov %r8,%rcx
9249 255,224, //jmpq *%rax
9250 49,201, //xor %ecx,%ecx
9251 77,137,194, //mov %r8,%r10
9252 69,49,201, //xor %r9d,%r9d
9253 68,15,182,24, //movzbl (%rax),%r11d
9254 72,255,192, //inc %rax
9255 73,211,227, //shl %cl,%r11
9256 77,9,217, //or %r11,%r9
9257 72,131,193,8, //add $0x8,%rcx
9258 73,255,202, //dec %r10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009259 117,234, //jne b23 <_sk_load_a8_avx+0x53>
Mike Klein894d5612017-03-07 07:59:52 -05009260 196,193,249,110,193, //vmovq %r9,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009261 235,164, //jmp ae4 <_sk_load_a8_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05009262};
9263
9264CODE const uint8_t sk_store_a8_avx[] = {
9265 72,173, //lods %ds:(%rsi),%rax
9266 76,139,8, //mov (%rax),%r9
9267 196,98,125,24,66,8, //vbroadcastss 0x8(%rdx),%ymm8
9268 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
9269 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
9270 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
9271 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
9272 196,65,57,103,192, //vpackuswb %xmm8,%xmm8,%xmm8
9273 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009274 117,10, //jne b73 <_sk_store_a8_avx+0x33>
Mike Klein894d5612017-03-07 07:59:52 -05009275 196,65,123,17,4,57, //vmovsd %xmm8,(%r9,%rdi,1)
9276 72,173, //lods %ds:(%rsi),%rax
9277 255,224, //jmpq *%rax
9278 137,200, //mov %ecx,%eax
9279 36,7, //and $0x7,%al
9280 254,200, //dec %al
9281 68,15,182,192, //movzbl %al,%r8d
9282 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009283 119,236, //ja b6f <_sk_store_a8_avx+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05009284 196,66,121,48,192, //vpmovzxbw %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009285 76,141,21,69,0,0,0, //lea 0x45(%rip),%r10 # bd4 <_sk_store_a8_avx+0x94>
Mike Klein894d5612017-03-07 07:59:52 -05009286 75,99,4,130, //movslq (%r10,%r8,4),%rax
9287 76,1,208, //add %r10,%rax
9288 255,224, //jmpq *%rax
9289 196,67,121,20,68,57,6,12, //vpextrb $0xc,%xmm8,0x6(%r9,%rdi,1)
9290 196,67,121,20,68,57,5,10, //vpextrb $0xa,%xmm8,0x5(%r9,%rdi,1)
9291 196,67,121,20,68,57,4,8, //vpextrb $0x8,%xmm8,0x4(%r9,%rdi,1)
9292 196,67,121,20,68,57,3,6, //vpextrb $0x6,%xmm8,0x3(%r9,%rdi,1)
9293 196,67,121,20,68,57,2,4, //vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
9294 196,67,121,20,68,57,1,2, //vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
9295 196,67,121,20,4,57,0, //vpextrb $0x0,%xmm8,(%r9,%rdi,1)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009296 235,158, //jmp b6f <_sk_store_a8_avx+0x2f>
Mike Klein894d5612017-03-07 07:59:52 -05009297 15,31,0, //nopl (%rax)
9298 244, //hlt
9299 255, //(bad)
9300 255, //(bad)
9301 255, //(bad)
9302 236, //in (%dx),%al
9303 255, //(bad)
9304 255, //(bad)
9305 255,228, //jmpq *%rsp
9306 255, //(bad)
9307 255, //(bad)
9308 255, //(bad)
9309 220,255, //fdivr %st,%st(7)
9310 255, //(bad)
9311 255,212, //callq *%rsp
9312 255, //(bad)
9313 255, //(bad)
9314 255,204, //dec %esp
9315 255, //(bad)
9316 255, //(bad)
9317 255,196, //inc %esp
9318 255, //(bad)
9319 255, //(bad)
9320 255, //.byte 0xff
9321};
9322
9323CODE const uint8_t sk_load_565_avx[] = {
9324 72,173, //lods %ds:(%rsi),%rax
9325 76,139,16, //mov (%rax),%r10
9326 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009327 117,106, //jne c64 <_sk_load_565_avx+0x74>
Mike Klein894d5612017-03-07 07:59:52 -05009328 196,193,122,111,4,122, //vmovdqu (%r10,%rdi,2),%xmm0
9329 197,241,239,201, //vpxor %xmm1,%xmm1,%xmm1
9330 197,249,105,201, //vpunpckhwd %xmm1,%xmm0,%xmm1
9331 196,226,121,51,192, //vpmovzxwd %xmm0,%xmm0
9332 196,227,125,24,209,1, //vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
9333 196,226,125,24,66,104, //vbroadcastss 0x68(%rdx),%ymm0
9334 197,252,84,194, //vandps %ymm2,%ymm0,%ymm0
9335 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
9336 196,226,125,24,74,116, //vbroadcastss 0x74(%rdx),%ymm1
9337 197,244,89,192, //vmulps %ymm0,%ymm1,%ymm0
9338 196,226,125,24,74,108, //vbroadcastss 0x6c(%rdx),%ymm1
9339 197,244,84,202, //vandps %ymm2,%ymm1,%ymm1
9340 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
9341 196,226,125,24,90,120, //vbroadcastss 0x78(%rdx),%ymm3
9342 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
9343 196,226,125,24,90,112, //vbroadcastss 0x70(%rdx),%ymm3
9344 197,228,84,210, //vandps %ymm2,%ymm3,%ymm2
9345 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
9346 196,226,125,24,90,124, //vbroadcastss 0x7c(%rdx),%ymm3
9347 197,228,89,210, //vmulps %ymm2,%ymm3,%ymm2
9348 196,226,125,24,26, //vbroadcastss (%rdx),%ymm3
9349 72,173, //lods %ds:(%rsi),%rax
9350 255,224, //jmpq *%rax
9351 65,137,200, //mov %ecx,%r8d
9352 65,128,224,7, //and $0x7,%r8b
9353 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
9354 65,254,200, //dec %r8b
9355 69,15,182,192, //movzbl %r8b,%r8d
9356 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009357 119,132, //ja c00 <_sk_load_565_avx+0x10>
9358 76,141,13,73,0,0,0, //lea 0x49(%rip),%r9 # ccc <_sk_load_565_avx+0xdc>
Mike Klein894d5612017-03-07 07:59:52 -05009359 75,99,4,129, //movslq (%r9,%r8,4),%rax
9360 76,1,200, //add %r9,%rax
9361 255,224, //jmpq *%rax
9362 197,249,239,192, //vpxor %xmm0,%xmm0,%xmm0
9363 196,193,121,196,68,122,12,6, //vpinsrw $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
9364 196,193,121,196,68,122,10,5, //vpinsrw $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
9365 196,193,121,196,68,122,8,4, //vpinsrw $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
9366 196,193,121,196,68,122,6,3, //vpinsrw $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
9367 196,193,121,196,68,122,4,2, //vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
9368 196,193,121,196,68,122,2,1, //vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
9369 196,193,121,196,4,122,0, //vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009370 233,52,255,255,255, //jmpq c00 <_sk_load_565_avx+0x10>
Mike Klein894d5612017-03-07 07:59:52 -05009371 244, //hlt
9372 255, //(bad)
9373 255, //(bad)
9374 255, //(bad)
9375 236, //in (%dx),%al
9376 255, //(bad)
9377 255, //(bad)
9378 255,228, //jmpq *%rsp
9379 255, //(bad)
9380 255, //(bad)
9381 255, //(bad)
9382 220,255, //fdivr %st,%st(7)
9383 255, //(bad)
9384 255,212, //callq *%rsp
9385 255, //(bad)
9386 255, //(bad)
9387 255,204, //dec %esp
9388 255, //(bad)
9389 255, //(bad)
9390 255,192, //inc %eax
9391 255, //(bad)
9392 255, //(bad)
9393 255, //.byte 0xff
9394};
9395
9396CODE const uint8_t sk_store_565_avx[] = {
9397 72,173, //lods %ds:(%rsi),%rax
9398 76,139,8, //mov (%rax),%r9
9399 196,98,125,24,130,128,0,0,0, //vbroadcastss 0x80(%rdx),%ymm8
9400 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
9401 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
9402 196,193,41,114,241,11, //vpslld $0xb,%xmm9,%xmm10
9403 196,67,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm9
9404 196,193,49,114,241,11, //vpslld $0xb,%xmm9,%xmm9
9405 196,67,45,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
9406 196,98,125,24,146,132,0,0,0, //vbroadcastss 0x84(%rdx),%ymm10
9407 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
9408 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
9409 196,193,33,114,242,5, //vpslld $0x5,%xmm10,%xmm11
9410 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
9411 196,193,41,114,242,5, //vpslld $0x5,%xmm10,%xmm10
9412 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
9413 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
9414 197,60,89,194, //vmulps %ymm2,%ymm8,%ymm8
9415 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
9416 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
9417 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
9418 196,66,57,43,193, //vpackusdw %xmm9,%xmm8,%xmm8
9419 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009420 117,10, //jne d6e <_sk_store_565_avx+0x86>
Mike Klein894d5612017-03-07 07:59:52 -05009421 196,65,122,127,4,121, //vmovdqu %xmm8,(%r9,%rdi,2)
9422 72,173, //lods %ds:(%rsi),%rax
9423 255,224, //jmpq *%rax
9424 137,200, //mov %ecx,%eax
9425 36,7, //and $0x7,%al
9426 254,200, //dec %al
9427 68,15,182,192, //movzbl %al,%r8d
9428 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009429 119,236, //ja d6a <_sk_store_565_avx+0x82>
9430 76,141,21,71,0,0,0, //lea 0x47(%rip),%r10 # dcc <_sk_store_565_avx+0xe4>
Mike Klein894d5612017-03-07 07:59:52 -05009431 75,99,4,130, //movslq (%r10,%r8,4),%rax
9432 76,1,208, //add %r10,%rax
9433 255,224, //jmpq *%rax
9434 196,67,121,21,68,121,12,6, //vpextrw $0x6,%xmm8,0xc(%r9,%rdi,2)
9435 196,67,121,21,68,121,10,5, //vpextrw $0x5,%xmm8,0xa(%r9,%rdi,2)
9436 196,67,121,21,68,121,8,4, //vpextrw $0x4,%xmm8,0x8(%r9,%rdi,2)
9437 196,67,121,21,68,121,6,3, //vpextrw $0x3,%xmm8,0x6(%r9,%rdi,2)
9438 196,67,121,21,68,121,4,2, //vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
9439 196,67,121,21,68,121,2,1, //vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
9440 197,121,126,192, //vmovd %xmm8,%eax
9441 102,65,137,4,121, //mov %ax,(%r9,%rdi,2)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009442 235,161, //jmp d6a <_sk_store_565_avx+0x82>
Mike Klein894d5612017-03-07 07:59:52 -05009443 15,31,0, //nopl (%rax)
9444 242,255, //repnz (bad)
9445 255, //(bad)
9446 255, //(bad)
9447 234, //(bad)
9448 255, //(bad)
9449 255, //(bad)
9450 255,226, //jmpq *%rdx
9451 255, //(bad)
9452 255, //(bad)
9453 255, //(bad)
9454 218,255, //(bad)
9455 255, //(bad)
9456 255,210, //callq *%rdx
9457 255, //(bad)
9458 255, //(bad)
9459 255,202, //dec %edx
9460 255, //(bad)
9461 255, //(bad)
9462 255,194, //inc %edx
9463 255, //(bad)
9464 255, //(bad)
9465 255, //.byte 0xff
9466};
9467
9468CODE const uint8_t sk_load_8888_avx[] = {
9469 72,173, //lods %ds:(%rsi),%rax
9470 76,139,16, //mov (%rax),%r10
9471 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009472 15,133,157,0,0,0, //jne e93 <_sk_load_8888_avx+0xab>
Mike Klein894d5612017-03-07 07:59:52 -05009473 196,65,124,16,12,186, //vmovups (%r10,%rdi,4),%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009474 184,255,0,0,0, //mov $0xff,%eax
9475 197,249,110,192, //vmovd %eax,%xmm0
9476 197,249,112,192,0, //vpshufd $0x0,%xmm0,%xmm0
9477 196,99,125,24,216,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm11
Mike Klein894d5612017-03-07 07:59:52 -05009478 196,193,36,84,193, //vandps %ymm9,%ymm11,%ymm0
9479 197,252,91,192, //vcvtdq2ps %ymm0,%ymm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009480 184,129,128,128,59, //mov $0x3b808081,%eax
9481 197,249,110,200, //vmovd %eax,%xmm1
9482 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
9483 196,99,117,24,193,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm8
9484 196,193,124,89,192, //vmulps %ymm8,%ymm0,%ymm0
Mike Klein894d5612017-03-07 07:59:52 -05009485 196,193,41,114,209,8, //vpsrld $0x8,%xmm9,%xmm10
9486 196,99,125,25,203,1, //vextractf128 $0x1,%ymm9,%xmm3
9487 197,241,114,211,8, //vpsrld $0x8,%xmm3,%xmm1
9488 196,227,45,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm10,%ymm1
9489 197,164,84,201, //vandps %ymm1,%ymm11,%ymm1
9490 197,252,91,201, //vcvtdq2ps %ymm1,%ymm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009491 196,193,116,89,200, //vmulps %ymm8,%ymm1,%ymm1
Mike Klein894d5612017-03-07 07:59:52 -05009492 196,193,41,114,209,16, //vpsrld $0x10,%xmm9,%xmm10
9493 197,233,114,211,16, //vpsrld $0x10,%xmm3,%xmm2
9494 196,227,45,24,210,1, //vinsertf128 $0x1,%xmm2,%ymm10,%ymm2
9495 197,164,84,210, //vandps %ymm2,%ymm11,%ymm2
9496 197,252,91,210, //vcvtdq2ps %ymm2,%ymm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009497 196,193,108,89,208, //vmulps %ymm8,%ymm2,%ymm2
Mike Klein894d5612017-03-07 07:59:52 -05009498 196,193,49,114,209,24, //vpsrld $0x18,%xmm9,%xmm9
9499 197,225,114,211,24, //vpsrld $0x18,%xmm3,%xmm3
9500 196,227,53,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
9501 197,252,91,219, //vcvtdq2ps %ymm3,%ymm3
9502 196,193,100,89,216, //vmulps %ymm8,%ymm3,%ymm3
9503 72,173, //lods %ds:(%rsi),%rax
9504 255,224, //jmpq *%rax
9505 65,137,200, //mov %ecx,%r8d
9506 65,128,224,7, //and $0x7,%r8b
9507 196,65,52,87,201, //vxorps %ymm9,%ymm9,%ymm9
9508 65,254,200, //dec %r8b
9509 69,15,182,192, //movzbl %r8b,%r8d
9510 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009511 15,135,76,255,255,255, //ja dfc <_sk_load_8888_avx+0x14>
9512 76,141,13,137,0,0,0, //lea 0x89(%rip),%r9 # f40 <_sk_load_8888_avx+0x158>
Mike Klein894d5612017-03-07 07:59:52 -05009513 75,99,4,129, //movslq (%r9,%r8,4),%rax
9514 76,1,200, //add %r9,%rax
9515 255,224, //jmpq *%rax
9516 196,193,121,110,68,186,24, //vmovd 0x18(%r10,%rdi,4),%xmm0
9517 197,249,112,192,68, //vpshufd $0x44,%xmm0,%xmm0
9518 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
9519 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
9520 196,99,117,12,200,64, //vblendps $0x40,%ymm0,%ymm1,%ymm9
9521 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
9522 196,195,121,34,68,186,20,1, //vpinsrd $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
9523 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
9524 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
9525 196,195,121,34,68,186,16,0, //vpinsrd $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
9526 196,99,53,24,200,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
9527 196,195,49,34,68,186,12,3, //vpinsrd $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
9528 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
9529 196,195,49,34,68,186,8,2, //vpinsrd $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
9530 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
9531 196,195,49,34,68,186,4,1, //vpinsrd $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
9532 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
9533 196,195,49,34,4,186,0, //vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
9534 196,99,53,12,200,15, //vblendps $0xf,%ymm0,%ymm9,%ymm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009535 233,188,254,255,255, //jmpq dfc <_sk_load_8888_avx+0x14>
Mike Klein894d5612017-03-07 07:59:52 -05009536 238, //out %al,(%dx)
9537 255, //(bad)
9538 255, //(bad)
9539 255,224, //jmpq *%rax
9540 255, //(bad)
9541 255, //(bad)
9542 255,210, //callq *%rdx
9543 255, //(bad)
9544 255, //(bad)
9545 255,196, //inc %esp
9546 255, //(bad)
9547 255, //(bad)
9548 255,176,255,255,255,156, //pushq -0x63000001(%rax)
9549 255, //(bad)
9550 255, //(bad)
9551 255, //.byte 0xff
9552 128,255,255, //cmp $0xff,%bh
9553 255, //.byte 0xff
9554};
9555
9556CODE const uint8_t sk_store_8888_avx[] = {
9557 72,173, //lods %ds:(%rsi),%rax
9558 76,139,8, //mov (%rax),%r9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009559 184,0,0,127,67, //mov $0x437f0000,%eax
9560 197,121,110,192, //vmovd %eax,%xmm8
9561 196,67,121,4,192,0, //vpermilps $0x0,%xmm8,%xmm8
9562 196,67,61,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm8,%ymm8
Mike Klein894d5612017-03-07 07:59:52 -05009563 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
9564 196,65,125,91,201, //vcvtps2dq %ymm9,%ymm9
9565 197,60,89,209, //vmulps %ymm1,%ymm8,%ymm10
9566 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
9567 196,193,33,114,242,8, //vpslld $0x8,%xmm10,%xmm11
9568 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
9569 196,193,41,114,242,8, //vpslld $0x8,%xmm10,%xmm10
9570 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
9571 196,65,45,86,201, //vorpd %ymm9,%ymm10,%ymm9
9572 197,60,89,210, //vmulps %ymm2,%ymm8,%ymm10
9573 196,65,125,91,210, //vcvtps2dq %ymm10,%ymm10
9574 196,193,33,114,242,16, //vpslld $0x10,%xmm10,%xmm11
9575 196,67,125,25,210,1, //vextractf128 $0x1,%ymm10,%xmm10
9576 196,193,41,114,242,16, //vpslld $0x10,%xmm10,%xmm10
9577 196,67,37,24,210,1, //vinsertf128 $0x1,%xmm10,%ymm11,%ymm10
9578 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
9579 196,65,125,91,192, //vcvtps2dq %ymm8,%ymm8
9580 196,193,33,114,240,24, //vpslld $0x18,%xmm8,%xmm11
9581 196,67,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm8
9582 196,193,57,114,240,24, //vpslld $0x18,%xmm8,%xmm8
9583 196,67,37,24,192,1, //vinsertf128 $0x1,%xmm8,%ymm11,%ymm8
9584 196,65,45,86,192, //vorpd %ymm8,%ymm10,%ymm8
9585 196,65,53,86,192, //vorpd %ymm8,%ymm9,%ymm8
9586 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009587 117,10, //jne 1000 <_sk_store_8888_avx+0xa4>
Mike Klein894d5612017-03-07 07:59:52 -05009588 196,65,124,17,4,185, //vmovups %ymm8,(%r9,%rdi,4)
9589 72,173, //lods %ds:(%rsi),%rax
9590 255,224, //jmpq *%rax
9591 137,200, //mov %ecx,%eax
9592 36,7, //and $0x7,%al
9593 254,200, //dec %al
9594 68,15,182,192, //movzbl %al,%r8d
9595 65,128,248,6, //cmp $0x6,%r8b
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009596 119,236, //ja ffc <_sk_store_8888_avx+0xa0>
9597 76,141,21,85,0,0,0, //lea 0x55(%rip),%r10 # 106c <_sk_store_8888_avx+0x110>
Mike Klein894d5612017-03-07 07:59:52 -05009598 75,99,4,130, //movslq (%r10,%r8,4),%rax
9599 76,1,208, //add %r10,%rax
9600 255,224, //jmpq *%rax
9601 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
9602 196,67,121,22,76,185,24,2, //vpextrd $0x2,%xmm9,0x18(%r9,%rdi,4)
9603 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
9604 196,67,121,22,76,185,20,1, //vpextrd $0x1,%xmm9,0x14(%r9,%rdi,4)
9605 196,67,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm9
9606 196,65,121,126,76,185,16, //vmovd %xmm9,0x10(%r9,%rdi,4)
9607 196,67,121,22,68,185,12,3, //vpextrd $0x3,%xmm8,0xc(%r9,%rdi,4)
9608 196,67,121,22,68,185,8,2, //vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
9609 196,67,121,22,68,185,4,1, //vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
9610 196,65,121,126,4,185, //vmovd %xmm8,(%r9,%rdi,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009611 235,147, //jmp ffc <_sk_store_8888_avx+0xa0>
9612 15,31,0, //nopl (%rax)
9613 245, //cmc
Mike Klein894d5612017-03-07 07:59:52 -05009614 255, //(bad)
9615 255, //(bad)
9616 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009617 237, //in (%dx),%eax
Mike Klein894d5612017-03-07 07:59:52 -05009618 255, //(bad)
9619 255, //(bad)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009620 255,229, //jmpq *%rbp
9621 255, //(bad)
9622 255, //(bad)
9623 255, //(bad)
9624 221,255, //(bad)
9625 255, //(bad)
9626 255,208, //callq *%rax
9627 255, //(bad)
9628 255, //(bad)
9629 255,194, //inc %edx
Mike Klein894d5612017-03-07 07:59:52 -05009630 255, //(bad)
9631 255, //(bad)
9632 255, //.byte 0xff
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009633 180,255, //mov $0xff,%ah
Mike Klein894d5612017-03-07 07:59:52 -05009634 255, //(bad)
9635 255, //.byte 0xff
9636};
9637
9638CODE const uint8_t sk_load_f16_avx[] = {
9639 72,173, //lods %ds:(%rsi),%rax
9640 72,139,0, //mov (%rax),%rax
9641 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009642 15,133,240,0,0,0, //jne 1186 <_sk_load_f16_avx+0xfe>
Mike Klein894d5612017-03-07 07:59:52 -05009643 197,249,16,12,248, //vmovupd (%rax,%rdi,8),%xmm1
9644 197,249,16,84,248,16, //vmovupd 0x10(%rax,%rdi,8),%xmm2
9645 197,249,16,92,248,32, //vmovupd 0x20(%rax,%rdi,8),%xmm3
9646 197,121,16,68,248,48, //vmovupd 0x30(%rax,%rdi,8),%xmm8
9647 197,241,97,194, //vpunpcklwd %xmm2,%xmm1,%xmm0
9648 197,241,105,202, //vpunpckhwd %xmm2,%xmm1,%xmm1
9649 196,193,97,97,208, //vpunpcklwd %xmm8,%xmm3,%xmm2
9650 196,193,97,105,216, //vpunpckhwd %xmm8,%xmm3,%xmm3
9651 197,121,97,193, //vpunpcklwd %xmm1,%xmm0,%xmm8
9652 197,249,105,193, //vpunpckhwd %xmm1,%xmm0,%xmm0
9653 197,233,97,203, //vpunpcklwd %xmm3,%xmm2,%xmm1
9654 197,105,105,203, //vpunpckhwd %xmm3,%xmm2,%xmm9
9655 197,249,110,90,100, //vmovd 0x64(%rdx),%xmm3
9656 197,249,112,219,0, //vpshufd $0x0,%xmm3,%xmm3
9657 196,193,97,101,208, //vpcmpgtw %xmm8,%xmm3,%xmm2
9658 196,65,105,223,192, //vpandn %xmm8,%xmm2,%xmm8
9659 197,225,101,208, //vpcmpgtw %xmm0,%xmm3,%xmm2
9660 197,233,223,192, //vpandn %xmm0,%xmm2,%xmm0
9661 197,225,101,209, //vpcmpgtw %xmm1,%xmm3,%xmm2
9662 197,233,223,201, //vpandn %xmm1,%xmm2,%xmm1
9663 196,193,97,101,209, //vpcmpgtw %xmm9,%xmm3,%xmm2
9664 196,193,105,223,209, //vpandn %xmm9,%xmm2,%xmm2
9665 196,66,121,51,208, //vpmovzxwd %xmm8,%xmm10
9666 196,98,121,51,201, //vpmovzxwd %xmm1,%xmm9
9667 197,225,239,219, //vpxor %xmm3,%xmm3,%xmm3
9668 197,57,105,195, //vpunpckhwd %xmm3,%xmm8,%xmm8
9669 197,241,105,203, //vpunpckhwd %xmm3,%xmm1,%xmm1
9670 196,98,121,51,216, //vpmovzxwd %xmm0,%xmm11
9671 196,98,121,51,226, //vpmovzxwd %xmm2,%xmm12
9672 197,121,105,235, //vpunpckhwd %xmm3,%xmm0,%xmm13
9673 197,105,105,243, //vpunpckhwd %xmm3,%xmm2,%xmm14
9674 196,193,121,114,242,13, //vpslld $0xd,%xmm10,%xmm0
9675 196,193,105,114,241,13, //vpslld $0xd,%xmm9,%xmm2
9676 196,227,125,24,194,1, //vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
9677 196,98,125,24,74,92, //vbroadcastss 0x5c(%rdx),%ymm9
9678 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
9679 196,193,105,114,240,13, //vpslld $0xd,%xmm8,%xmm2
9680 197,241,114,241,13, //vpslld $0xd,%xmm1,%xmm1
9681 196,227,109,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
9682 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
9683 196,193,105,114,243,13, //vpslld $0xd,%xmm11,%xmm2
9684 196,193,97,114,244,13, //vpslld $0xd,%xmm12,%xmm3
9685 196,227,109,24,211,1, //vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
9686 197,180,89,210, //vmulps %ymm2,%ymm9,%ymm2
9687 196,193,57,114,245,13, //vpslld $0xd,%xmm13,%xmm8
9688 196,193,97,114,246,13, //vpslld $0xd,%xmm14,%xmm3
9689 196,227,61,24,219,1, //vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
9690 197,180,89,219, //vmulps %ymm3,%ymm9,%ymm3
9691 72,173, //lods %ds:(%rsi),%rax
9692 255,224, //jmpq *%rax
9693 197,251,16,12,248, //vmovsd (%rax,%rdi,8),%xmm1
9694 196,65,57,87,192, //vxorpd %xmm8,%xmm8,%xmm8
9695 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009696 117,6, //jne 119c <_sk_load_f16_avx+0x114>
Mike Klein894d5612017-03-07 07:59:52 -05009697 197,250,126,201, //vmovq %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009698 235,30, //jmp 11ba <_sk_load_f16_avx+0x132>
Mike Klein894d5612017-03-07 07:59:52 -05009699 197,241,22,76,248,8, //vmovhpd 0x8(%rax,%rdi,8),%xmm1,%xmm1
9700 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009701 114,18, //jb 11ba <_sk_load_f16_avx+0x132>
Mike Klein894d5612017-03-07 07:59:52 -05009702 197,251,16,84,248,16, //vmovsd 0x10(%rax,%rdi,8),%xmm2
9703 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009704 117,19, //jne 11c7 <_sk_load_f16_avx+0x13f>
Mike Klein894d5612017-03-07 07:59:52 -05009705 197,250,126,210, //vmovq %xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009706 235,46, //jmp 11e8 <_sk_load_f16_avx+0x160>
Mike Klein894d5612017-03-07 07:59:52 -05009707 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
9708 197,233,87,210, //vxorpd %xmm2,%xmm2,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009709 233,230,254,255,255, //jmpq 10ad <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05009710 197,233,22,84,248,24, //vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
9711 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009712 114,21, //jb 11e8 <_sk_load_f16_avx+0x160>
Mike Klein894d5612017-03-07 07:59:52 -05009713 197,251,16,92,248,32, //vmovsd 0x20(%rax,%rdi,8),%xmm3
9714 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009715 117,18, //jne 11f1 <_sk_load_f16_avx+0x169>
Mike Klein894d5612017-03-07 07:59:52 -05009716 197,250,126,219, //vmovq %xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009717 233,197,254,255,255, //jmpq 10ad <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05009718 197,225,87,219, //vxorpd %xmm3,%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009719 233,188,254,255,255, //jmpq 10ad <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05009720 197,225,22,92,248,40, //vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
9721 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009722 15,130,172,254,255,255, //jb 10ad <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05009723 197,123,16,68,248,48, //vmovsd 0x30(%rax,%rdi,8),%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009724 233,161,254,255,255, //jmpq 10ad <_sk_load_f16_avx+0x25>
Mike Klein894d5612017-03-07 07:59:52 -05009725};
9726
9727CODE const uint8_t sk_store_f16_avx[] = {
9728 72,173, //lods %ds:(%rsi),%rax
9729 72,139,0, //mov (%rax),%rax
9730 196,98,125,24,66,96, //vbroadcastss 0x60(%rdx),%ymm8
9731 197,60,89,200, //vmulps %ymm0,%ymm8,%ymm9
9732 196,67,125,25,202,1, //vextractf128 $0x1,%ymm9,%xmm10
9733 196,193,41,114,210,13, //vpsrld $0xd,%xmm10,%xmm10
9734 196,193,49,114,209,13, //vpsrld $0xd,%xmm9,%xmm9
9735 197,60,89,217, //vmulps %ymm1,%ymm8,%ymm11
9736 196,67,125,25,220,1, //vextractf128 $0x1,%ymm11,%xmm12
9737 196,193,25,114,212,13, //vpsrld $0xd,%xmm12,%xmm12
9738 196,193,33,114,211,13, //vpsrld $0xd,%xmm11,%xmm11
9739 197,60,89,234, //vmulps %ymm2,%ymm8,%ymm13
9740 196,67,125,25,238,1, //vextractf128 $0x1,%ymm13,%xmm14
9741 196,193,9,114,214,13, //vpsrld $0xd,%xmm14,%xmm14
9742 196,193,17,114,213,13, //vpsrld $0xd,%xmm13,%xmm13
9743 197,60,89,195, //vmulps %ymm3,%ymm8,%ymm8
9744 196,67,125,25,199,1, //vextractf128 $0x1,%ymm8,%xmm15
9745 196,193,1,114,215,13, //vpsrld $0xd,%xmm15,%xmm15
9746 196,193,57,114,208,13, //vpsrld $0xd,%xmm8,%xmm8
9747 196,193,33,115,251,2, //vpslldq $0x2,%xmm11,%xmm11
9748 196,65,33,235,201, //vpor %xmm9,%xmm11,%xmm9
9749 196,193,33,115,252,2, //vpslldq $0x2,%xmm12,%xmm11
9750 196,65,33,235,226, //vpor %xmm10,%xmm11,%xmm12
9751 196,193,57,115,248,2, //vpslldq $0x2,%xmm8,%xmm8
9752 196,65,57,235,197, //vpor %xmm13,%xmm8,%xmm8
9753 196,193,41,115,255,2, //vpslldq $0x2,%xmm15,%xmm10
9754 196,65,41,235,238, //vpor %xmm14,%xmm10,%xmm13
9755 196,65,49,98,216, //vpunpckldq %xmm8,%xmm9,%xmm11
9756 196,65,49,106,208, //vpunpckhdq %xmm8,%xmm9,%xmm10
9757 196,65,25,98,205, //vpunpckldq %xmm13,%xmm12,%xmm9
9758 196,65,25,106,197, //vpunpckhdq %xmm13,%xmm12,%xmm8
9759 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009760 117,27, //jne 12cf <_sk_store_f16_avx+0xc3>
Mike Klein894d5612017-03-07 07:59:52 -05009761 197,120,17,28,248, //vmovups %xmm11,(%rax,%rdi,8)
9762 197,120,17,84,248,16, //vmovups %xmm10,0x10(%rax,%rdi,8)
9763 197,120,17,76,248,32, //vmovups %xmm9,0x20(%rax,%rdi,8)
9764 197,122,127,68,248,48, //vmovdqu %xmm8,0x30(%rax,%rdi,8)
9765 72,173, //lods %ds:(%rsi),%rax
9766 255,224, //jmpq *%rax
9767 197,121,214,28,248, //vmovq %xmm11,(%rax,%rdi,8)
9768 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009769 116,241, //je 12cb <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05009770 197,121,23,92,248,8, //vmovhpd %xmm11,0x8(%rax,%rdi,8)
9771 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009772 114,229, //jb 12cb <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05009773 197,121,214,84,248,16, //vmovq %xmm10,0x10(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009774 116,221, //je 12cb <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05009775 197,121,23,84,248,24, //vmovhpd %xmm10,0x18(%rax,%rdi,8)
9776 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009777 114,209, //jb 12cb <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05009778 197,121,214,76,248,32, //vmovq %xmm9,0x20(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009779 116,201, //je 12cb <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05009780 197,121,23,76,248,40, //vmovhpd %xmm9,0x28(%rax,%rdi,8)
9781 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009782 114,189, //jb 12cb <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05009783 197,121,214,68,248,48, //vmovq %xmm8,0x30(%rax,%rdi,8)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009784 235,181, //jmp 12cb <_sk_store_f16_avx+0xbf>
Mike Klein894d5612017-03-07 07:59:52 -05009785};
9786
9787CODE const uint8_t sk_store_f32_avx[] = {
9788 72,173, //lods %ds:(%rsi),%rax
9789 76,139,0, //mov (%rax),%r8
9790 72,141,4,189,0,0,0,0, //lea 0x0(,%rdi,4),%rax
9791 197,124,20,193, //vunpcklps %ymm1,%ymm0,%ymm8
9792 197,124,21,217, //vunpckhps %ymm1,%ymm0,%ymm11
9793 197,108,20,203, //vunpcklps %ymm3,%ymm2,%ymm9
9794 197,108,21,227, //vunpckhps %ymm3,%ymm2,%ymm12
9795 196,65,61,20,209, //vunpcklpd %ymm9,%ymm8,%ymm10
9796 196,65,61,21,201, //vunpckhpd %ymm9,%ymm8,%ymm9
9797 196,65,37,20,196, //vunpcklpd %ymm12,%ymm11,%ymm8
9798 196,65,37,21,220, //vunpckhpd %ymm12,%ymm11,%ymm11
9799 72,133,201, //test %rcx,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009800 117,55, //jne 1383 <_sk_store_f32_avx+0x6d>
Mike Klein894d5612017-03-07 07:59:52 -05009801 196,67,45,24,225,1, //vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
9802 196,67,61,24,235,1, //vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
9803 196,67,45,6,201,49, //vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
9804 196,67,61,6,195,49, //vperm2f128 $0x31,%ymm11,%ymm8,%ymm8
9805 196,65,125,17,36,128, //vmovupd %ymm12,(%r8,%rax,4)
9806 196,65,125,17,108,128,32, //vmovupd %ymm13,0x20(%r8,%rax,4)
9807 196,65,125,17,76,128,64, //vmovupd %ymm9,0x40(%r8,%rax,4)
9808 196,65,125,17,68,128,96, //vmovupd %ymm8,0x60(%r8,%rax,4)
9809 72,173, //lods %ds:(%rsi),%rax
9810 255,224, //jmpq *%rax
9811 196,65,121,17,20,128, //vmovupd %xmm10,(%r8,%rax,4)
9812 72,131,249,1, //cmp $0x1,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009813 116,240, //je 137f <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05009814 196,65,121,17,76,128,16, //vmovupd %xmm9,0x10(%r8,%rax,4)
9815 72,131,249,3, //cmp $0x3,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009816 114,227, //jb 137f <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05009817 196,65,121,17,68,128,32, //vmovupd %xmm8,0x20(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009818 116,218, //je 137f <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05009819 196,65,121,17,92,128,48, //vmovupd %xmm11,0x30(%r8,%rax,4)
9820 72,131,249,5, //cmp $0x5,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009821 114,205, //jb 137f <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05009822 196,67,125,25,84,128,64,1, //vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009823 116,195, //je 137f <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05009824 196,67,125,25,76,128,80,1, //vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
9825 72,131,249,7, //cmp $0x7,%rcx
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009826 114,181, //jb 137f <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05009827 196,67,125,25,68,128,96,1, //vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
Mike Kleinfdf3bbe2017-03-07 14:41:06 -05009828 235,171, //jmp 137f <_sk_store_f32_avx+0x69>
Mike Klein894d5612017-03-07 07:59:52 -05009829};
9830
9831CODE const uint8_t sk_clamp_x_avx[] = {
9832 72,173, //lods %ds:(%rsi),%rax
9833 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
9834 197,60,95,200, //vmaxps %ymm0,%ymm8,%ymm9
9835 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9836 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
9837 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
9838 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
9839 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
9840 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
9841 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
9842 72,173, //lods %ds:(%rsi),%rax
9843 255,224, //jmpq *%rax
9844};
9845
9846CODE const uint8_t sk_clamp_y_avx[] = {
9847 72,173, //lods %ds:(%rsi),%rax
9848 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
9849 197,60,95,201, //vmaxps %ymm1,%ymm8,%ymm9
9850 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9851 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
9852 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
9853 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
9854 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
9855 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
9856 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
9857 72,173, //lods %ds:(%rsi),%rax
9858 255,224, //jmpq *%rax
9859};
9860
9861CODE const uint8_t sk_repeat_x_avx[] = {
9862 72,173, //lods %ds:(%rsi),%rax
9863 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9864 196,65,124,94,200, //vdivps %ymm8,%ymm0,%ymm9
9865 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
9866 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
9867 196,65,124,92,201, //vsubps %ymm9,%ymm0,%ymm9
9868 196,99,125,25,192,1, //vextractf128 $0x1,%ymm8,%xmm0
9869 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
9870 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
9871 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
9872 196,227,61,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm8,%ymm0
9873 197,180,93,192, //vminps %ymm0,%ymm9,%ymm0
9874 72,173, //lods %ds:(%rsi),%rax
9875 255,224, //jmpq *%rax
9876};
9877
9878CODE const uint8_t sk_repeat_y_avx[] = {
9879 72,173, //lods %ds:(%rsi),%rax
9880 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9881 196,65,116,94,200, //vdivps %ymm8,%ymm1,%ymm9
9882 196,67,125,8,201,1, //vroundps $0x1,%ymm9,%ymm9
9883 196,65,52,89,200, //vmulps %ymm8,%ymm9,%ymm9
9884 196,65,116,92,201, //vsubps %ymm9,%ymm1,%ymm9
9885 196,99,125,25,193,1, //vextractf128 $0x1,%ymm8,%xmm1
9886 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
9887 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
9888 196,65,57,254,194, //vpaddd %xmm10,%xmm8,%xmm8
9889 196,227,61,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm8,%ymm1
9890 197,180,93,201, //vminps %ymm1,%ymm9,%ymm1
9891 72,173, //lods %ds:(%rsi),%rax
9892 255,224, //jmpq *%rax
9893};
9894
9895CODE const uint8_t sk_mirror_x_avx[] = {
9896 72,173, //lods %ds:(%rsi),%rax
9897 197,122,16,0, //vmovss (%rax),%xmm8
9898 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
9899 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
9900 196,65,124,92,209, //vsubps %ymm9,%ymm0,%ymm10
9901 196,193,58,88,192, //vaddss %xmm8,%xmm8,%xmm0
9902 196,227,121,4,192,0, //vpermilps $0x0,%xmm0,%xmm0
9903 196,227,125,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
9904 197,44,94,192, //vdivps %ymm0,%ymm10,%ymm8
9905 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
9906 197,188,89,192, //vmulps %ymm0,%ymm8,%ymm0
9907 197,172,92,192, //vsubps %ymm0,%ymm10,%ymm0
9908 196,193,124,92,193, //vsubps %ymm9,%ymm0,%ymm0
9909 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
9910 197,60,92,192, //vsubps %ymm0,%ymm8,%ymm8
9911 197,60,84,192, //vandps %ymm0,%ymm8,%ymm8
9912 196,99,125,25,200,1, //vextractf128 $0x1,%ymm9,%xmm0
9913 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
9914 196,193,121,254,194, //vpaddd %xmm10,%xmm0,%xmm0
9915 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
9916 196,227,53,24,192,1, //vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
9917 197,188,93,192, //vminps %ymm0,%ymm8,%ymm0
9918 72,173, //lods %ds:(%rsi),%rax
9919 255,224, //jmpq *%rax
9920};
9921
9922CODE const uint8_t sk_mirror_y_avx[] = {
9923 72,173, //lods %ds:(%rsi),%rax
9924 197,122,16,0, //vmovss (%rax),%xmm8
9925 196,65,121,112,200,0, //vpshufd $0x0,%xmm8,%xmm9
9926 196,67,53,24,201,1, //vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
9927 196,65,116,92,209, //vsubps %ymm9,%ymm1,%ymm10
9928 196,193,58,88,200, //vaddss %xmm8,%xmm8,%xmm1
9929 196,227,121,4,201,0, //vpermilps $0x0,%xmm1,%xmm1
9930 196,227,117,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
9931 197,44,94,193, //vdivps %ymm1,%ymm10,%ymm8
9932 196,67,125,8,192,1, //vroundps $0x1,%ymm8,%ymm8
9933 197,188,89,201, //vmulps %ymm1,%ymm8,%ymm1
9934 197,172,92,201, //vsubps %ymm1,%ymm10,%ymm1
9935 196,193,116,92,201, //vsubps %ymm9,%ymm1,%ymm1
9936 196,65,60,87,192, //vxorps %ymm8,%ymm8,%ymm8
9937 197,60,92,193, //vsubps %ymm1,%ymm8,%ymm8
9938 197,60,84,193, //vandps %ymm1,%ymm8,%ymm8
9939 196,99,125,25,201,1, //vextractf128 $0x1,%ymm9,%xmm1
9940 196,65,41,118,210, //vpcmpeqd %xmm10,%xmm10,%xmm10
9941 196,193,113,254,202, //vpaddd %xmm10,%xmm1,%xmm1
9942 196,65,49,254,202, //vpaddd %xmm10,%xmm9,%xmm9
9943 196,227,53,24,201,1, //vinsertf128 $0x1,%xmm1,%ymm9,%ymm1
9944 197,188,93,201, //vminps %ymm1,%ymm8,%ymm1
9945 72,173, //lods %ds:(%rsi),%rax
9946 255,224, //jmpq *%rax
9947};
9948
Mike Kleine9ed07d2017-03-07 12:28:11 -05009949CODE const uint8_t sk_luminance_to_alpha_avx[] = {
9950 196,226,125,24,154,136,0,0,0, //vbroadcastss 0x88(%rdx),%ymm3
9951 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
9952 196,226,125,24,154,140,0,0,0, //vbroadcastss 0x8c(%rdx),%ymm3
9953 197,228,89,201, //vmulps %ymm1,%ymm3,%ymm1
9954 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
9955 196,226,125,24,138,144,0,0,0, //vbroadcastss 0x90(%rdx),%ymm1
9956 197,244,89,202, //vmulps %ymm2,%ymm1,%ymm1
9957 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
9958 72,173, //lods %ds:(%rsi),%rax
9959 197,252,87,192, //vxorps %ymm0,%ymm0,%ymm0
9960 197,244,87,201, //vxorps %ymm1,%ymm1,%ymm1
9961 197,236,87,210, //vxorps %ymm2,%ymm2,%ymm2
9962 255,224, //jmpq *%rax
9963};
9964
Mike Klein894d5612017-03-07 07:59:52 -05009965CODE const uint8_t sk_matrix_2x3_avx[] = {
9966 72,173, //lods %ds:(%rsi),%rax
9967 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9968 196,98,125,24,72,8, //vbroadcastss 0x8(%rax),%ymm9
9969 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
9970 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
9971 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
9972 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
9973 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
9974 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
9975 196,98,125,24,80,12, //vbroadcastss 0xc(%rax),%ymm10
9976 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
9977 197,172,89,201, //vmulps %ymm1,%ymm10,%ymm1
9978 196,193,116,88,203, //vaddps %ymm11,%ymm1,%ymm1
9979 197,180,89,192, //vmulps %ymm0,%ymm9,%ymm0
9980 197,252,88,201, //vaddps %ymm1,%ymm0,%ymm1
9981 72,173, //lods %ds:(%rsi),%rax
9982 197,124,41,192, //vmovaps %ymm8,%ymm0
9983 255,224, //jmpq *%rax
9984};
9985
9986CODE const uint8_t sk_matrix_3x4_avx[] = {
9987 72,173, //lods %ds:(%rsi),%rax
9988 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
9989 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
9990 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
9991 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
9992 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
9993 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
9994 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
9995 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
9996 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
9997 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
9998 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
9999 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
10000 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
10001 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
10002 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
10003 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10004 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
10005 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10006 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
10007 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10008 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
10009 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
10010 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
10011 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
10012 197,156,89,210, //vmulps %ymm2,%ymm12,%ymm2
10013 196,193,108,88,213, //vaddps %ymm13,%ymm2,%ymm2
10014 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
10015 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
10016 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
10017 197,252,88,209, //vaddps %ymm1,%ymm0,%ymm2
10018 72,173, //lods %ds:(%rsi),%rax
10019 197,124,41,192, //vmovaps %ymm8,%ymm0
10020 197,124,41,201, //vmovaps %ymm9,%ymm1
10021 255,224, //jmpq *%rax
10022};
10023
Mike Kleine9ed07d2017-03-07 12:28:11 -050010024CODE const uint8_t sk_matrix_4x5_avx[] = {
10025 72,173, //lods %ds:(%rsi),%rax
10026 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10027 196,98,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm9
10028 196,98,125,24,80,32, //vbroadcastss 0x20(%rax),%ymm10
10029 196,98,125,24,88,48, //vbroadcastss 0x30(%rax),%ymm11
10030 196,98,125,24,96,64, //vbroadcastss 0x40(%rax),%ymm12
10031 197,36,89,219, //vmulps %ymm3,%ymm11,%ymm11
10032 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10033 197,44,89,210, //vmulps %ymm2,%ymm10,%ymm10
10034 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10035 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
10036 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10037 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
10038 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
10039 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
10040 196,98,125,24,80,20, //vbroadcastss 0x14(%rax),%ymm10
10041 196,98,125,24,88,36, //vbroadcastss 0x24(%rax),%ymm11
10042 196,98,125,24,96,52, //vbroadcastss 0x34(%rax),%ymm12
10043 196,98,125,24,104,68, //vbroadcastss 0x44(%rax),%ymm13
10044 197,28,89,227, //vmulps %ymm3,%ymm12,%ymm12
10045 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
10046 197,36,89,218, //vmulps %ymm2,%ymm11,%ymm11
10047 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10048 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
10049 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10050 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
10051 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10052 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
10053 196,98,125,24,88,24, //vbroadcastss 0x18(%rax),%ymm11
10054 196,98,125,24,96,40, //vbroadcastss 0x28(%rax),%ymm12
10055 196,98,125,24,104,56, //vbroadcastss 0x38(%rax),%ymm13
10056 196,98,125,24,112,72, //vbroadcastss 0x48(%rax),%ymm14
10057 197,20,89,235, //vmulps %ymm3,%ymm13,%ymm13
10058 196,65,20,88,238, //vaddps %ymm14,%ymm13,%ymm13
10059 197,28,89,226, //vmulps %ymm2,%ymm12,%ymm12
10060 196,65,28,88,229, //vaddps %ymm13,%ymm12,%ymm12
10061 197,36,89,217, //vmulps %ymm1,%ymm11,%ymm11
10062 196,65,36,88,220, //vaddps %ymm12,%ymm11,%ymm11
10063 197,44,89,208, //vmulps %ymm0,%ymm10,%ymm10
10064 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10065 196,98,125,24,88,12, //vbroadcastss 0xc(%rax),%ymm11
10066 196,98,125,24,96,28, //vbroadcastss 0x1c(%rax),%ymm12
10067 196,98,125,24,104,44, //vbroadcastss 0x2c(%rax),%ymm13
10068 196,98,125,24,112,60, //vbroadcastss 0x3c(%rax),%ymm14
10069 196,98,125,24,120,76, //vbroadcastss 0x4c(%rax),%ymm15
10070 197,140,89,219, //vmulps %ymm3,%ymm14,%ymm3
10071 196,193,100,88,223, //vaddps %ymm15,%ymm3,%ymm3
10072 197,148,89,210, //vmulps %ymm2,%ymm13,%ymm2
10073 197,236,88,211, //vaddps %ymm3,%ymm2,%ymm2
10074 197,156,89,201, //vmulps %ymm1,%ymm12,%ymm1
10075 197,244,88,202, //vaddps %ymm2,%ymm1,%ymm1
10076 197,164,89,192, //vmulps %ymm0,%ymm11,%ymm0
10077 197,252,88,217, //vaddps %ymm1,%ymm0,%ymm3
10078 72,173, //lods %ds:(%rsi),%rax
10079 197,124,41,192, //vmovaps %ymm8,%ymm0
10080 197,124,41,201, //vmovaps %ymm9,%ymm1
10081 197,124,41,210, //vmovaps %ymm10,%ymm2
10082 255,224, //jmpq *%rax
10083};
10084
Mike Klein894d5612017-03-07 07:59:52 -050010085CODE const uint8_t sk_matrix_perspective_avx[] = {
10086 72,173, //lods %ds:(%rsi),%rax
10087 196,98,125,24,0, //vbroadcastss (%rax),%ymm8
10088 196,98,125,24,72,4, //vbroadcastss 0x4(%rax),%ymm9
10089 196,98,125,24,80,8, //vbroadcastss 0x8(%rax),%ymm10
10090 197,52,89,201, //vmulps %ymm1,%ymm9,%ymm9
10091 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10092 197,60,89,192, //vmulps %ymm0,%ymm8,%ymm8
10093 196,65,60,88,193, //vaddps %ymm9,%ymm8,%ymm8
10094 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
10095 196,98,125,24,80,16, //vbroadcastss 0x10(%rax),%ymm10
10096 196,98,125,24,88,20, //vbroadcastss 0x14(%rax),%ymm11
10097 197,44,89,209, //vmulps %ymm1,%ymm10,%ymm10
10098 196,65,44,88,211, //vaddps %ymm11,%ymm10,%ymm10
10099 197,52,89,200, //vmulps %ymm0,%ymm9,%ymm9
10100 196,65,52,88,202, //vaddps %ymm10,%ymm9,%ymm9
10101 196,98,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm10
10102 196,98,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm11
10103 196,98,125,24,96,32, //vbroadcastss 0x20(%rax),%ymm12
10104 197,164,89,201, //vmulps %ymm1,%ymm11,%ymm1
10105 196,193,116,88,204, //vaddps %ymm12,%ymm1,%ymm1
10106 197,172,89,192, //vmulps %ymm0,%ymm10,%ymm0
10107 197,252,88,193, //vaddps %ymm1,%ymm0,%ymm0
10108 197,252,83,200, //vrcpps %ymm0,%ymm1
10109 197,188,89,193, //vmulps %ymm1,%ymm8,%ymm0
10110 197,180,89,201, //vmulps %ymm1,%ymm9,%ymm1
10111 72,173, //lods %ds:(%rsi),%rax
10112 255,224, //jmpq *%rax
10113};
10114
10115CODE const uint8_t sk_linear_gradient_2stops_avx[] = {
10116 72,173, //lods %ds:(%rsi),%rax
10117 196,226,125,24,72,16, //vbroadcastss 0x10(%rax),%ymm1
10118 196,226,125,24,16, //vbroadcastss (%rax),%ymm2
10119 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
10120 197,108,88,193, //vaddps %ymm1,%ymm2,%ymm8
10121 196,226,125,24,72,20, //vbroadcastss 0x14(%rax),%ymm1
10122 196,226,125,24,80,4, //vbroadcastss 0x4(%rax),%ymm2
10123 197,244,89,200, //vmulps %ymm0,%ymm1,%ymm1
10124 197,236,88,201, //vaddps %ymm1,%ymm2,%ymm1
10125 196,226,125,24,80,24, //vbroadcastss 0x18(%rax),%ymm2
10126 196,226,125,24,88,8, //vbroadcastss 0x8(%rax),%ymm3
10127 197,236,89,208, //vmulps %ymm0,%ymm2,%ymm2
10128 197,228,88,210, //vaddps %ymm2,%ymm3,%ymm2
10129 196,226,125,24,88,28, //vbroadcastss 0x1c(%rax),%ymm3
10130 196,98,125,24,72,12, //vbroadcastss 0xc(%rax),%ymm9
10131 197,228,89,192, //vmulps %ymm0,%ymm3,%ymm0
10132 197,180,88,216, //vaddps %ymm0,%ymm9,%ymm3
10133 72,173, //lods %ds:(%rsi),%rax
10134 197,124,41,192, //vmovaps %ymm8,%ymm0
10135 255,224, //jmpq *%rax
10136};
10137
10138CODE const uint8_t sk_start_pipeline_sse41[] = {
10139 65,87, //push %r15
10140 65,86, //push %r14
10141 65,85, //push %r13
10142 65,84, //push %r12
10143 86, //push %rsi
10144 87, //push %rdi
10145 83, //push %rbx
10146 72,129,236,160,0,0,0, //sub $0xa0,%rsp
10147 68,15,41,188,36,144,0,0,0, //movaps %xmm15,0x90(%rsp)
10148 68,15,41,180,36,128,0,0,0, //movaps %xmm14,0x80(%rsp)
10149 68,15,41,108,36,112, //movaps %xmm13,0x70(%rsp)
10150 68,15,41,100,36,96, //movaps %xmm12,0x60(%rsp)
10151 68,15,41,92,36,80, //movaps %xmm11,0x50(%rsp)
10152 68,15,41,84,36,64, //movaps %xmm10,0x40(%rsp)
10153 68,15,41,76,36,48, //movaps %xmm9,0x30(%rsp)
10154 68,15,41,68,36,32, //movaps %xmm8,0x20(%rsp)
10155 15,41,124,36,16, //movaps %xmm7,0x10(%rsp)
10156 15,41,52,36, //movaps %xmm6,(%rsp)
10157 77,137,207, //mov %r9,%r15
10158 77,137,198, //mov %r8,%r14
10159 72,137,203, //mov %rcx,%rbx
10160 72,137,214, //mov %rdx,%rsi
10161 72,173, //lods %ds:(%rsi),%rax
10162 73,137,196, //mov %rax,%r12
10163 73,137,245, //mov %rsi,%r13
10164 72,141,67,4, //lea 0x4(%rbx),%rax
10165 76,57,248, //cmp %r15,%rax
10166 118,5, //jbe 73 <_sk_start_pipeline_sse41+0x73>
10167 72,137,216, //mov %rbx,%rax
10168 235,52, //jmp a7 <_sk_start_pipeline_sse41+0xa7>
10169 15,87,192, //xorps %xmm0,%xmm0
10170 15,87,201, //xorps %xmm1,%xmm1
10171 15,87,210, //xorps %xmm2,%xmm2
10172 15,87,219, //xorps %xmm3,%xmm3
10173 15,87,228, //xorps %xmm4,%xmm4
10174 15,87,237, //xorps %xmm5,%xmm5
10175 15,87,246, //xorps %xmm6,%xmm6
10176 15,87,255, //xorps %xmm7,%xmm7
10177 72,137,223, //mov %rbx,%rdi
10178 76,137,238, //mov %r13,%rsi
10179 76,137,242, //mov %r14,%rdx
10180 65,255,212, //callq *%r12
10181 72,141,67,4, //lea 0x4(%rbx),%rax
10182 72,131,195,8, //add $0x8,%rbx
10183 76,57,251, //cmp %r15,%rbx
10184 72,137,195, //mov %rax,%rbx
10185 118,204, //jbe 73 <_sk_start_pipeline_sse41+0x73>
10186 15,40,52,36, //movaps (%rsp),%xmm6
10187 15,40,124,36,16, //movaps 0x10(%rsp),%xmm7
10188 68,15,40,68,36,32, //movaps 0x20(%rsp),%xmm8
10189 68,15,40,76,36,48, //movaps 0x30(%rsp),%xmm9
10190 68,15,40,84,36,64, //movaps 0x40(%rsp),%xmm10
10191 68,15,40,92,36,80, //movaps 0x50(%rsp),%xmm11
10192 68,15,40,100,36,96, //movaps 0x60(%rsp),%xmm12
10193 68,15,40,108,36,112, //movaps 0x70(%rsp),%xmm13
10194 68,15,40,180,36,128,0,0,0, //movaps 0x80(%rsp),%xmm14
10195 68,15,40,188,36,144,0,0,0, //movaps 0x90(%rsp),%xmm15
10196 72,129,196,160,0,0,0, //add $0xa0,%rsp
10197 91, //pop %rbx
10198 95, //pop %rdi
10199 94, //pop %rsi
10200 65,92, //pop %r12
10201 65,93, //pop %r13
10202 65,94, //pop %r14
10203 65,95, //pop %r15
10204 195, //retq
10205};
10206
10207CODE const uint8_t sk_just_return_sse41[] = {
10208 195, //retq
10209};
10210
10211CODE const uint8_t sk_seed_shader_sse41[] = {
10212 72,173, //lods %ds:(%rsi),%rax
10213 102,15,110,199, //movd %edi,%xmm0
10214 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
10215 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010216 185,0,0,0,63, //mov $0x3f000000,%ecx
10217 102,15,110,209, //movd %ecx,%xmm2
10218 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10219 15,88,202, //addps %xmm2,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050010220 15,16,66,20, //movups 0x14(%rdx),%xmm0
10221 15,88,193, //addps %xmm1,%xmm0
10222 102,15,110,8, //movd (%rax),%xmm1
10223 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
10224 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010225 15,88,202, //addps %xmm2,%xmm1
10226 184,0,0,128,63, //mov $0x3f800000,%eax
10227 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010228 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10229 72,173, //lods %ds:(%rsi),%rax
10230 15,87,219, //xorps %xmm3,%xmm3
10231 15,87,228, //xorps %xmm4,%xmm4
10232 15,87,237, //xorps %xmm5,%xmm5
10233 15,87,246, //xorps %xmm6,%xmm6
10234 15,87,255, //xorps %xmm7,%xmm7
10235 255,224, //jmpq *%rax
10236};
10237
10238CODE const uint8_t sk_constant_color_sse41[] = {
10239 72,173, //lods %ds:(%rsi),%rax
10240 15,16,24, //movups (%rax),%xmm3
10241 15,40,195, //movaps %xmm3,%xmm0
10242 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
10243 15,40,203, //movaps %xmm3,%xmm1
10244 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
10245 15,40,211, //movaps %xmm3,%xmm2
10246 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
10247 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
10248 72,173, //lods %ds:(%rsi),%rax
10249 255,224, //jmpq *%rax
10250};
10251
10252CODE const uint8_t sk_clear_sse41[] = {
10253 72,173, //lods %ds:(%rsi),%rax
10254 15,87,192, //xorps %xmm0,%xmm0
10255 15,87,201, //xorps %xmm1,%xmm1
10256 15,87,210, //xorps %xmm2,%xmm2
10257 15,87,219, //xorps %xmm3,%xmm3
10258 255,224, //jmpq *%rax
10259};
10260
10261CODE const uint8_t sk_plus__sse41[] = {
10262 15,88,196, //addps %xmm4,%xmm0
10263 15,88,205, //addps %xmm5,%xmm1
10264 15,88,214, //addps %xmm6,%xmm2
10265 15,88,223, //addps %xmm7,%xmm3
10266 72,173, //lods %ds:(%rsi),%rax
10267 255,224, //jmpq *%rax
10268};
10269
10270CODE const uint8_t sk_srcover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010271 184,0,0,128,63, //mov $0x3f800000,%eax
10272 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010273 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10274 68,15,92,195, //subps %xmm3,%xmm8
10275 69,15,40,200, //movaps %xmm8,%xmm9
10276 68,15,89,204, //mulps %xmm4,%xmm9
10277 65,15,88,193, //addps %xmm9,%xmm0
10278 69,15,40,200, //movaps %xmm8,%xmm9
10279 68,15,89,205, //mulps %xmm5,%xmm9
10280 65,15,88,201, //addps %xmm9,%xmm1
10281 69,15,40,200, //movaps %xmm8,%xmm9
10282 68,15,89,206, //mulps %xmm6,%xmm9
10283 65,15,88,209, //addps %xmm9,%xmm2
10284 68,15,89,199, //mulps %xmm7,%xmm8
10285 65,15,88,216, //addps %xmm8,%xmm3
10286 72,173, //lods %ds:(%rsi),%rax
10287 255,224, //jmpq *%rax
10288};
10289
10290CODE const uint8_t sk_dstover_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010291 184,0,0,128,63, //mov $0x3f800000,%eax
10292 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010293 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10294 68,15,92,199, //subps %xmm7,%xmm8
10295 65,15,89,192, //mulps %xmm8,%xmm0
10296 15,88,196, //addps %xmm4,%xmm0
10297 65,15,89,200, //mulps %xmm8,%xmm1
10298 15,88,205, //addps %xmm5,%xmm1
10299 65,15,89,208, //mulps %xmm8,%xmm2
10300 15,88,214, //addps %xmm6,%xmm2
10301 65,15,89,216, //mulps %xmm8,%xmm3
10302 15,88,223, //addps %xmm7,%xmm3
10303 72,173, //lods %ds:(%rsi),%rax
10304 255,224, //jmpq *%rax
10305};
10306
10307CODE const uint8_t sk_clamp_0_sse41[] = {
10308 69,15,87,192, //xorps %xmm8,%xmm8
10309 65,15,95,192, //maxps %xmm8,%xmm0
10310 65,15,95,200, //maxps %xmm8,%xmm1
10311 65,15,95,208, //maxps %xmm8,%xmm2
10312 65,15,95,216, //maxps %xmm8,%xmm3
10313 72,173, //lods %ds:(%rsi),%rax
10314 255,224, //jmpq *%rax
10315};
10316
10317CODE const uint8_t sk_clamp_1_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010318 184,0,0,128,63, //mov $0x3f800000,%eax
10319 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010320 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10321 65,15,93,192, //minps %xmm8,%xmm0
10322 65,15,93,200, //minps %xmm8,%xmm1
10323 65,15,93,208, //minps %xmm8,%xmm2
10324 65,15,93,216, //minps %xmm8,%xmm3
10325 72,173, //lods %ds:(%rsi),%rax
10326 255,224, //jmpq *%rax
10327};
10328
10329CODE const uint8_t sk_clamp_a_sse41[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010330 184,0,0,128,63, //mov $0x3f800000,%eax
10331 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010332 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10333 65,15,93,216, //minps %xmm8,%xmm3
10334 15,93,195, //minps %xmm3,%xmm0
10335 15,93,203, //minps %xmm3,%xmm1
10336 15,93,211, //minps %xmm3,%xmm2
10337 72,173, //lods %ds:(%rsi),%rax
10338 255,224, //jmpq *%rax
10339};
10340
10341CODE const uint8_t sk_set_rgb_sse41[] = {
10342 72,173, //lods %ds:(%rsi),%rax
10343 243,15,16,0, //movss (%rax),%xmm0
10344 243,15,16,72,4, //movss 0x4(%rax),%xmm1
10345 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
10346 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
10347 243,15,16,80,8, //movss 0x8(%rax),%xmm2
10348 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10349 72,173, //lods %ds:(%rsi),%rax
10350 255,224, //jmpq *%rax
10351};
10352
10353CODE const uint8_t sk_swap_rb_sse41[] = {
10354 68,15,40,192, //movaps %xmm0,%xmm8
10355 72,173, //lods %ds:(%rsi),%rax
10356 15,40,194, //movaps %xmm2,%xmm0
10357 65,15,40,208, //movaps %xmm8,%xmm2
10358 255,224, //jmpq *%rax
10359};
10360
10361CODE const uint8_t sk_swap_sse41[] = {
10362 68,15,40,195, //movaps %xmm3,%xmm8
10363 68,15,40,202, //movaps %xmm2,%xmm9
10364 68,15,40,209, //movaps %xmm1,%xmm10
10365 68,15,40,216, //movaps %xmm0,%xmm11
10366 72,173, //lods %ds:(%rsi),%rax
10367 15,40,196, //movaps %xmm4,%xmm0
10368 15,40,205, //movaps %xmm5,%xmm1
10369 15,40,214, //movaps %xmm6,%xmm2
10370 15,40,223, //movaps %xmm7,%xmm3
10371 65,15,40,227, //movaps %xmm11,%xmm4
10372 65,15,40,234, //movaps %xmm10,%xmm5
10373 65,15,40,241, //movaps %xmm9,%xmm6
10374 65,15,40,248, //movaps %xmm8,%xmm7
10375 255,224, //jmpq *%rax
10376};
10377
10378CODE const uint8_t sk_move_src_dst_sse41[] = {
10379 72,173, //lods %ds:(%rsi),%rax
10380 15,40,224, //movaps %xmm0,%xmm4
10381 15,40,233, //movaps %xmm1,%xmm5
10382 15,40,242, //movaps %xmm2,%xmm6
10383 15,40,251, //movaps %xmm3,%xmm7
10384 255,224, //jmpq *%rax
10385};
10386
10387CODE const uint8_t sk_move_dst_src_sse41[] = {
10388 72,173, //lods %ds:(%rsi),%rax
10389 15,40,196, //movaps %xmm4,%xmm0
10390 15,40,205, //movaps %xmm5,%xmm1
10391 15,40,214, //movaps %xmm6,%xmm2
10392 15,40,223, //movaps %xmm7,%xmm3
10393 255,224, //jmpq *%rax
10394};
10395
10396CODE const uint8_t sk_premul_sse41[] = {
10397 15,89,195, //mulps %xmm3,%xmm0
10398 15,89,203, //mulps %xmm3,%xmm1
10399 15,89,211, //mulps %xmm3,%xmm2
10400 72,173, //lods %ds:(%rsi),%rax
10401 255,224, //jmpq *%rax
10402};
10403
10404CODE const uint8_t sk_unpremul_sse41[] = {
10405 68,15,40,192, //movaps %xmm0,%xmm8
10406 69,15,87,201, //xorps %xmm9,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010407 184,0,0,128,63, //mov $0x3f800000,%eax
10408 102,68,15,110,208, //movd %eax,%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050010409 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
10410 68,15,94,211, //divps %xmm3,%xmm10
10411 15,40,195, //movaps %xmm3,%xmm0
10412 65,15,194,193,0, //cmpeqps %xmm9,%xmm0
10413 102,69,15,56,20,209, //blendvps %xmm0,%xmm9,%xmm10
10414 69,15,89,194, //mulps %xmm10,%xmm8
10415 65,15,89,202, //mulps %xmm10,%xmm1
10416 65,15,89,210, //mulps %xmm10,%xmm2
10417 72,173, //lods %ds:(%rsi),%rax
10418 65,15,40,192, //movaps %xmm8,%xmm0
10419 255,224, //jmpq *%rax
10420};
10421
10422CODE const uint8_t sk_from_srgb_sse41[] = {
10423 68,15,40,194, //movaps %xmm2,%xmm8
10424 243,68,15,16,90,64, //movss 0x40(%rdx),%xmm11
10425 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
10426 69,15,40,211, //movaps %xmm11,%xmm10
10427 68,15,89,208, //mulps %xmm0,%xmm10
10428 68,15,40,240, //movaps %xmm0,%xmm14
10429 69,15,89,246, //mulps %xmm14,%xmm14
10430 243,15,16,82,60, //movss 0x3c(%rdx),%xmm2
10431 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10432 243,68,15,16,98,52, //movss 0x34(%rdx),%xmm12
10433 243,68,15,16,106,56, //movss 0x38(%rdx),%xmm13
10434 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
10435 68,15,40,202, //movaps %xmm2,%xmm9
10436 68,15,89,200, //mulps %xmm0,%xmm9
10437 69,15,88,205, //addps %xmm13,%xmm9
10438 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
10439 69,15,89,206, //mulps %xmm14,%xmm9
10440 69,15,88,204, //addps %xmm12,%xmm9
10441 243,68,15,16,114,68, //movss 0x44(%rdx),%xmm14
10442 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
10443 65,15,194,198,1, //cmpltps %xmm14,%xmm0
10444 102,69,15,56,20,202, //blendvps %xmm0,%xmm10,%xmm9
10445 69,15,40,251, //movaps %xmm11,%xmm15
10446 68,15,89,249, //mulps %xmm1,%xmm15
10447 15,40,193, //movaps %xmm1,%xmm0
10448 15,89,192, //mulps %xmm0,%xmm0
10449 68,15,40,210, //movaps %xmm2,%xmm10
10450 68,15,89,209, //mulps %xmm1,%xmm10
10451 69,15,88,213, //addps %xmm13,%xmm10
10452 68,15,89,208, //mulps %xmm0,%xmm10
10453 69,15,88,212, //addps %xmm12,%xmm10
10454 65,15,194,206,1, //cmpltps %xmm14,%xmm1
10455 15,40,193, //movaps %xmm1,%xmm0
10456 102,69,15,56,20,215, //blendvps %xmm0,%xmm15,%xmm10
10457 69,15,89,216, //mulps %xmm8,%xmm11
10458 65,15,40,192, //movaps %xmm8,%xmm0
10459 15,89,192, //mulps %xmm0,%xmm0
10460 65,15,89,208, //mulps %xmm8,%xmm2
10461 65,15,88,213, //addps %xmm13,%xmm2
10462 15,89,208, //mulps %xmm0,%xmm2
10463 65,15,88,212, //addps %xmm12,%xmm2
10464 69,15,194,198,1, //cmpltps %xmm14,%xmm8
10465 65,15,40,192, //movaps %xmm8,%xmm0
10466 102,65,15,56,20,211, //blendvps %xmm0,%xmm11,%xmm2
10467 72,173, //lods %ds:(%rsi),%rax
10468 65,15,40,193, //movaps %xmm9,%xmm0
10469 65,15,40,202, //movaps %xmm10,%xmm1
10470 255,224, //jmpq *%rax
10471};
10472
10473CODE const uint8_t sk_to_srgb_sse41[] = {
10474 72,131,236,24, //sub $0x18,%rsp
10475 15,41,60,36, //movaps %xmm7,(%rsp)
10476 15,40,254, //movaps %xmm6,%xmm7
10477 15,40,245, //movaps %xmm5,%xmm6
10478 15,40,236, //movaps %xmm4,%xmm5
10479 15,40,227, //movaps %xmm3,%xmm4
10480 68,15,40,194, //movaps %xmm2,%xmm8
10481 15,40,217, //movaps %xmm1,%xmm3
10482 15,82,208, //rsqrtps %xmm0,%xmm2
10483 68,15,83,202, //rcpps %xmm2,%xmm9
10484 68,15,82,210, //rsqrtps %xmm2,%xmm10
10485 243,15,16,18, //movss (%rdx),%xmm2
10486 243,68,15,16,90,72, //movss 0x48(%rdx),%xmm11
10487 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
10488 65,15,40,203, //movaps %xmm11,%xmm1
10489 15,89,200, //mulps %xmm0,%xmm1
10490 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10491 243,68,15,16,98,76, //movss 0x4c(%rdx),%xmm12
10492 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
10493 243,68,15,16,106,80, //movss 0x50(%rdx),%xmm13
10494 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
10495 243,68,15,16,114,84, //movss 0x54(%rdx),%xmm14
10496 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
10497 69,15,89,205, //mulps %xmm13,%xmm9
10498 69,15,88,206, //addps %xmm14,%xmm9
10499 69,15,89,212, //mulps %xmm12,%xmm10
10500 69,15,88,209, //addps %xmm9,%xmm10
10501 68,15,40,202, //movaps %xmm2,%xmm9
10502 69,15,93,202, //minps %xmm10,%xmm9
10503 243,68,15,16,122,88, //movss 0x58(%rdx),%xmm15
10504 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
10505 65,15,194,199,1, //cmpltps %xmm15,%xmm0
10506 102,68,15,56,20,201, //blendvps %xmm0,%xmm1,%xmm9
10507 15,82,195, //rsqrtps %xmm3,%xmm0
10508 15,83,200, //rcpps %xmm0,%xmm1
10509 15,82,192, //rsqrtps %xmm0,%xmm0
10510 65,15,89,205, //mulps %xmm13,%xmm1
10511 65,15,88,206, //addps %xmm14,%xmm1
10512 65,15,89,196, //mulps %xmm12,%xmm0
10513 15,88,193, //addps %xmm1,%xmm0
10514 68,15,40,210, //movaps %xmm2,%xmm10
10515 68,15,93,208, //minps %xmm0,%xmm10
10516 65,15,40,203, //movaps %xmm11,%xmm1
10517 15,89,203, //mulps %xmm3,%xmm1
10518 65,15,194,223,1, //cmpltps %xmm15,%xmm3
10519 15,40,195, //movaps %xmm3,%xmm0
10520 102,68,15,56,20,209, //blendvps %xmm0,%xmm1,%xmm10
10521 65,15,82,192, //rsqrtps %xmm8,%xmm0
10522 15,83,200, //rcpps %xmm0,%xmm1
10523 65,15,89,205, //mulps %xmm13,%xmm1
10524 65,15,88,206, //addps %xmm14,%xmm1
10525 15,82,192, //rsqrtps %xmm0,%xmm0
10526 65,15,89,196, //mulps %xmm12,%xmm0
10527 15,88,193, //addps %xmm1,%xmm0
10528 15,93,208, //minps %xmm0,%xmm2
10529 69,15,89,216, //mulps %xmm8,%xmm11
10530 69,15,194,199,1, //cmpltps %xmm15,%xmm8
10531 65,15,40,192, //movaps %xmm8,%xmm0
10532 102,65,15,56,20,211, //blendvps %xmm0,%xmm11,%xmm2
10533 72,173, //lods %ds:(%rsi),%rax
10534 65,15,40,193, //movaps %xmm9,%xmm0
10535 65,15,40,202, //movaps %xmm10,%xmm1
10536 15,40,220, //movaps %xmm4,%xmm3
10537 15,40,229, //movaps %xmm5,%xmm4
10538 15,40,238, //movaps %xmm6,%xmm5
10539 15,40,247, //movaps %xmm7,%xmm6
10540 15,40,60,36, //movaps (%rsp),%xmm7
10541 72,131,196,24, //add $0x18,%rsp
10542 255,224, //jmpq *%rax
10543};
10544
10545CODE const uint8_t sk_scale_1_float_sse41[] = {
10546 72,173, //lods %ds:(%rsi),%rax
10547 243,68,15,16,0, //movss (%rax),%xmm8
10548 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10549 65,15,89,192, //mulps %xmm8,%xmm0
10550 65,15,89,200, //mulps %xmm8,%xmm1
10551 65,15,89,208, //mulps %xmm8,%xmm2
10552 65,15,89,216, //mulps %xmm8,%xmm3
10553 72,173, //lods %ds:(%rsi),%rax
10554 255,224, //jmpq *%rax
10555};
10556
10557CODE const uint8_t sk_scale_u8_sse41[] = {
10558 72,173, //lods %ds:(%rsi),%rax
10559 72,139,0, //mov (%rax),%rax
10560 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
10561 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010562 184,129,128,128,59, //mov $0x3b808081,%eax
10563 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050010564 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
10565 69,15,89,200, //mulps %xmm8,%xmm9
10566 65,15,89,193, //mulps %xmm9,%xmm0
10567 65,15,89,201, //mulps %xmm9,%xmm1
10568 65,15,89,209, //mulps %xmm9,%xmm2
10569 65,15,89,217, //mulps %xmm9,%xmm3
10570 72,173, //lods %ds:(%rsi),%rax
10571 255,224, //jmpq *%rax
10572};
10573
10574CODE const uint8_t sk_lerp_1_float_sse41[] = {
10575 72,173, //lods %ds:(%rsi),%rax
10576 243,68,15,16,0, //movss (%rax),%xmm8
10577 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10578 15,92,196, //subps %xmm4,%xmm0
10579 65,15,89,192, //mulps %xmm8,%xmm0
10580 15,88,196, //addps %xmm4,%xmm0
10581 15,92,205, //subps %xmm5,%xmm1
10582 65,15,89,200, //mulps %xmm8,%xmm1
10583 15,88,205, //addps %xmm5,%xmm1
10584 15,92,214, //subps %xmm6,%xmm2
10585 65,15,89,208, //mulps %xmm8,%xmm2
10586 15,88,214, //addps %xmm6,%xmm2
10587 15,92,223, //subps %xmm7,%xmm3
10588 65,15,89,216, //mulps %xmm8,%xmm3
10589 15,88,223, //addps %xmm7,%xmm3
10590 72,173, //lods %ds:(%rsi),%rax
10591 255,224, //jmpq *%rax
10592};
10593
10594CODE const uint8_t sk_lerp_u8_sse41[] = {
10595 72,173, //lods %ds:(%rsi),%rax
10596 72,139,0, //mov (%rax),%rax
10597 102,68,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm8
10598 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010599 184,129,128,128,59, //mov $0x3b808081,%eax
10600 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050010601 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
10602 69,15,89,200, //mulps %xmm8,%xmm9
10603 15,92,196, //subps %xmm4,%xmm0
10604 65,15,89,193, //mulps %xmm9,%xmm0
10605 15,88,196, //addps %xmm4,%xmm0
10606 15,92,205, //subps %xmm5,%xmm1
10607 65,15,89,201, //mulps %xmm9,%xmm1
10608 15,88,205, //addps %xmm5,%xmm1
10609 15,92,214, //subps %xmm6,%xmm2
10610 65,15,89,209, //mulps %xmm9,%xmm2
10611 15,88,214, //addps %xmm6,%xmm2
10612 15,92,223, //subps %xmm7,%xmm3
10613 65,15,89,217, //mulps %xmm9,%xmm3
10614 15,88,223, //addps %xmm7,%xmm3
10615 72,173, //lods %ds:(%rsi),%rax
10616 255,224, //jmpq *%rax
10617};
10618
10619CODE const uint8_t sk_lerp_565_sse41[] = {
10620 72,173, //lods %ds:(%rsi),%rax
10621 72,139,0, //mov (%rax),%rax
10622 102,68,15,56,51,4,120, //pmovzxwd (%rax,%rdi,2),%xmm8
10623 102,15,110,90,104, //movd 0x68(%rdx),%xmm3
10624 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
10625 102,65,15,219,216, //pand %xmm8,%xmm3
10626 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010627 243,68,15,16,90,116, //movss 0x74(%rdx),%xmm11
10628 243,68,15,16,82,120, //movss 0x78(%rdx),%xmm10
Mike Klein894d5612017-03-07 07:59:52 -050010629 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
10630 69,15,89,217, //mulps %xmm9,%xmm11
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010631 102,15,110,90,108, //movd 0x6c(%rdx),%xmm3
10632 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
10633 102,65,15,219,216, //pand %xmm8,%xmm3
10634 15,91,219, //cvtdq2ps %xmm3,%xmm3
10635 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
10636 68,15,89,211, //mulps %xmm3,%xmm10
10637 102,15,110,90,112, //movd 0x70(%rdx),%xmm3
10638 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
10639 102,65,15,219,216, //pand %xmm8,%xmm3
10640 68,15,91,195, //cvtdq2ps %xmm3,%xmm8
10641 243,15,16,90,124, //movss 0x7c(%rdx),%xmm3
10642 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
10643 65,15,89,216, //mulps %xmm8,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050010644 15,92,196, //subps %xmm4,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010645 65,15,89,195, //mulps %xmm11,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050010646 15,88,196, //addps %xmm4,%xmm0
10647 15,92,205, //subps %xmm5,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010648 65,15,89,202, //mulps %xmm10,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050010649 15,88,205, //addps %xmm5,%xmm1
10650 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010651 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050010652 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010653 184,0,0,128,63, //mov $0x3f800000,%eax
10654 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050010655 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
10656 72,173, //lods %ds:(%rsi),%rax
10657 255,224, //jmpq *%rax
10658};
10659
10660CODE const uint8_t sk_load_tables_sse41[] = {
10661 72,173, //lods %ds:(%rsi),%rax
10662 72,139,8, //mov (%rax),%rcx
10663 76,139,64,8, //mov 0x8(%rax),%r8
10664 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
10665 102,15,110,66,16, //movd 0x10(%rdx),%xmm0
10666 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
10667 102,65,15,111,200, //movdqa %xmm8,%xmm1
10668 102,15,114,209,8, //psrld $0x8,%xmm1
10669 102,15,219,200, //pand %xmm0,%xmm1
10670 102,65,15,111,208, //movdqa %xmm8,%xmm2
10671 102,15,114,210,16, //psrld $0x10,%xmm2
10672 102,15,219,208, //pand %xmm0,%xmm2
10673 102,65,15,219,192, //pand %xmm8,%xmm0
10674 102,72,15,58,22,193,1, //pextrq $0x1,%xmm0,%rcx
10675 65,137,201, //mov %ecx,%r9d
10676 72,193,233,32, //shr $0x20,%rcx
10677 102,73,15,126,194, //movq %xmm0,%r10
10678 69,137,211, //mov %r10d,%r11d
10679 73,193,234,32, //shr $0x20,%r10
10680 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
10681 102,67,15,58,33,4,144,16, //insertps $0x10,(%r8,%r10,4),%xmm0
10682 102,67,15,58,33,4,136,32, //insertps $0x20,(%r8,%r9,4),%xmm0
10683 102,65,15,58,33,4,136,48, //insertps $0x30,(%r8,%rcx,4),%xmm0
10684 72,139,72,16, //mov 0x10(%rax),%rcx
10685 102,73,15,58,22,200,1, //pextrq $0x1,%xmm1,%r8
10686 69,137,193, //mov %r8d,%r9d
10687 73,193,232,32, //shr $0x20,%r8
10688 102,73,15,126,202, //movq %xmm1,%r10
10689 69,137,211, //mov %r10d,%r11d
10690 73,193,234,32, //shr $0x20,%r10
10691 243,66,15,16,12,153, //movss (%rcx,%r11,4),%xmm1
10692 102,66,15,58,33,12,145,16, //insertps $0x10,(%rcx,%r10,4),%xmm1
10693 243,66,15,16,28,137, //movss (%rcx,%r9,4),%xmm3
10694 102,15,58,33,203,32, //insertps $0x20,%xmm3,%xmm1
10695 243,66,15,16,28,129, //movss (%rcx,%r8,4),%xmm3
10696 102,15,58,33,203,48, //insertps $0x30,%xmm3,%xmm1
10697 72,139,64,24, //mov 0x18(%rax),%rax
10698 102,72,15,58,22,209,1, //pextrq $0x1,%xmm2,%rcx
10699 65,137,200, //mov %ecx,%r8d
10700 72,193,233,32, //shr $0x20,%rcx
10701 102,73,15,126,209, //movq %xmm2,%r9
10702 69,137,202, //mov %r9d,%r10d
10703 73,193,233,32, //shr $0x20,%r9
10704 243,66,15,16,20,144, //movss (%rax,%r10,4),%xmm2
10705 102,66,15,58,33,20,136,16, //insertps $0x10,(%rax,%r9,4),%xmm2
10706 243,66,15,16,28,128, //movss (%rax,%r8,4),%xmm3
10707 102,15,58,33,211,32, //insertps $0x20,%xmm3,%xmm2
10708 243,15,16,28,136, //movss (%rax,%rcx,4),%xmm3
10709 102,15,58,33,211,48, //insertps $0x30,%xmm3,%xmm2
10710 102,65,15,114,208,24, //psrld $0x18,%xmm8
10711 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
10712 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
10713 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
10714 65,15,89,216, //mulps %xmm8,%xmm3
10715 72,173, //lods %ds:(%rsi),%rax
10716 255,224, //jmpq *%rax
10717};
10718
10719CODE const uint8_t sk_load_a8_sse41[] = {
10720 72,173, //lods %ds:(%rsi),%rax
10721 72,139,0, //mov (%rax),%rax
10722 102,15,56,49,4,56, //pmovzxbd (%rax,%rdi,1),%xmm0
10723 15,91,192, //cvtdq2ps %xmm0,%xmm0
10724 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
10725 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
10726 15,89,216, //mulps %xmm0,%xmm3
10727 72,173, //lods %ds:(%rsi),%rax
10728 15,87,192, //xorps %xmm0,%xmm0
10729 15,87,201, //xorps %xmm1,%xmm1
10730 15,87,210, //xorps %xmm2,%xmm2
10731 255,224, //jmpq *%rax
10732};
10733
10734CODE const uint8_t sk_store_a8_sse41[] = {
10735 72,173, //lods %ds:(%rsi),%rax
10736 72,139,0, //mov (%rax),%rax
10737 243,68,15,16,66,8, //movss 0x8(%rdx),%xmm8
10738 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10739 68,15,89,195, //mulps %xmm3,%xmm8
10740 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
10741 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
10742 102,69,15,103,192, //packuswb %xmm8,%xmm8
10743 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
10744 72,173, //lods %ds:(%rsi),%rax
10745 255,224, //jmpq *%rax
10746};
10747
10748CODE const uint8_t sk_load_565_sse41[] = {
10749 72,173, //lods %ds:(%rsi),%rax
10750 72,139,0, //mov (%rax),%rax
10751 102,68,15,56,51,12,120, //pmovzxwd (%rax,%rdi,2),%xmm9
10752 102,15,110,66,104, //movd 0x68(%rdx),%xmm0
10753 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
10754 102,65,15,219,193, //pand %xmm9,%xmm0
10755 15,91,200, //cvtdq2ps %xmm0,%xmm1
10756 243,15,16,26, //movss (%rdx),%xmm3
10757 243,15,16,66,116, //movss 0x74(%rdx),%xmm0
10758 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
10759 15,89,193, //mulps %xmm1,%xmm0
10760 102,15,110,74,108, //movd 0x6c(%rdx),%xmm1
10761 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
10762 102,65,15,219,201, //pand %xmm9,%xmm1
10763 68,15,91,193, //cvtdq2ps %xmm1,%xmm8
10764 243,15,16,74,120, //movss 0x78(%rdx),%xmm1
10765 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
10766 65,15,89,200, //mulps %xmm8,%xmm1
10767 102,15,110,82,112, //movd 0x70(%rdx),%xmm2
10768 102,15,112,210,0, //pshufd $0x0,%xmm2,%xmm2
10769 102,65,15,219,209, //pand %xmm9,%xmm2
10770 68,15,91,194, //cvtdq2ps %xmm2,%xmm8
10771 243,15,16,82,124, //movss 0x7c(%rdx),%xmm2
10772 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
10773 65,15,89,208, //mulps %xmm8,%xmm2
10774 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
10775 72,173, //lods %ds:(%rsi),%rax
10776 255,224, //jmpq *%rax
10777};
10778
10779CODE const uint8_t sk_store_565_sse41[] = {
10780 72,173, //lods %ds:(%rsi),%rax
10781 72,139,0, //mov (%rax),%rax
10782 243,68,15,16,130,128,0,0,0, //movss 0x80(%rdx),%xmm8
10783 243,68,15,16,138,132,0,0,0, //movss 0x84(%rdx),%xmm9
10784 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10785 69,15,40,208, //movaps %xmm8,%xmm10
10786 68,15,89,208, //mulps %xmm0,%xmm10
10787 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
10788 102,65,15,114,242,11, //pslld $0xb,%xmm10
10789 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
10790 68,15,89,201, //mulps %xmm1,%xmm9
10791 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
10792 102,65,15,114,241,5, //pslld $0x5,%xmm9
10793 102,69,15,235,202, //por %xmm10,%xmm9
10794 68,15,89,194, //mulps %xmm2,%xmm8
10795 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
10796 102,69,15,86,193, //orpd %xmm9,%xmm8
10797 102,69,15,56,43,192, //packusdw %xmm8,%xmm8
10798 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
10799 72,173, //lods %ds:(%rsi),%rax
10800 255,224, //jmpq *%rax
10801};
10802
10803CODE const uint8_t sk_load_8888_sse41[] = {
10804 72,173, //lods %ds:(%rsi),%rax
10805 72,139,0, //mov (%rax),%rax
10806 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010807 184,255,0,0,0, //mov $0xff,%eax
10808 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050010809 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
10810 102,15,111,203, //movdqa %xmm3,%xmm1
10811 102,15,114,209,8, //psrld $0x8,%xmm1
10812 102,15,219,200, //pand %xmm0,%xmm1
10813 102,15,111,211, //movdqa %xmm3,%xmm2
10814 102,15,114,210,16, //psrld $0x10,%xmm2
10815 102,15,219,208, //pand %xmm0,%xmm2
10816 102,15,219,195, //pand %xmm3,%xmm0
10817 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010818 184,129,128,128,59, //mov $0x3b808081,%eax
10819 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010820 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10821 65,15,89,192, //mulps %xmm8,%xmm0
10822 15,91,201, //cvtdq2ps %xmm1,%xmm1
10823 65,15,89,200, //mulps %xmm8,%xmm1
10824 15,91,210, //cvtdq2ps %xmm2,%xmm2
10825 65,15,89,208, //mulps %xmm8,%xmm2
10826 102,15,114,211,24, //psrld $0x18,%xmm3
10827 15,91,219, //cvtdq2ps %xmm3,%xmm3
10828 65,15,89,216, //mulps %xmm8,%xmm3
10829 72,173, //lods %ds:(%rsi),%rax
10830 255,224, //jmpq *%rax
10831};
10832
10833CODE const uint8_t sk_store_8888_sse41[] = {
10834 72,173, //lods %ds:(%rsi),%rax
10835 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050010836 185,0,0,127,67, //mov $0x437f0000,%ecx
10837 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050010838 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10839 69,15,40,200, //movaps %xmm8,%xmm9
10840 68,15,89,200, //mulps %xmm0,%xmm9
10841 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
10842 69,15,40,208, //movaps %xmm8,%xmm10
10843 68,15,89,209, //mulps %xmm1,%xmm10
10844 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
10845 102,65,15,114,242,8, //pslld $0x8,%xmm10
10846 102,69,15,235,209, //por %xmm9,%xmm10
10847 69,15,40,200, //movaps %xmm8,%xmm9
10848 68,15,89,202, //mulps %xmm2,%xmm9
10849 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
10850 102,65,15,114,241,16, //pslld $0x10,%xmm9
10851 68,15,89,195, //mulps %xmm3,%xmm8
10852 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
10853 102,65,15,114,240,24, //pslld $0x18,%xmm8
10854 102,69,15,235,193, //por %xmm9,%xmm8
10855 102,69,15,235,194, //por %xmm10,%xmm8
10856 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
10857 72,173, //lods %ds:(%rsi),%rax
10858 255,224, //jmpq *%rax
10859};
10860
10861CODE const uint8_t sk_load_f16_sse41[] = {
10862 72,173, //lods %ds:(%rsi),%rax
10863 72,139,0, //mov (%rax),%rax
10864 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
10865 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
10866 102,15,111,208, //movdqa %xmm0,%xmm2
10867 102,15,97,209, //punpcklwd %xmm1,%xmm2
10868 102,15,105,193, //punpckhwd %xmm1,%xmm0
10869 102,68,15,111,194, //movdqa %xmm2,%xmm8
10870 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
10871 102,15,105,208, //punpckhwd %xmm0,%xmm2
10872 102,15,110,66,100, //movd 0x64(%rdx),%xmm0
10873 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
10874 102,15,111,203, //movdqa %xmm3,%xmm1
10875 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
10876 102,65,15,223,200, //pandn %xmm8,%xmm1
10877 102,15,101,218, //pcmpgtw %xmm2,%xmm3
10878 102,15,223,218, //pandn %xmm2,%xmm3
10879 102,15,56,51,193, //pmovzxwd %xmm1,%xmm0
10880 102,15,114,240,13, //pslld $0xd,%xmm0
10881 102,15,110,82,92, //movd 0x5c(%rdx),%xmm2
10882 102,68,15,112,194,0, //pshufd $0x0,%xmm2,%xmm8
10883 65,15,89,192, //mulps %xmm8,%xmm0
10884 102,69,15,239,201, //pxor %xmm9,%xmm9
10885 102,65,15,105,201, //punpckhwd %xmm9,%xmm1
10886 102,15,114,241,13, //pslld $0xd,%xmm1
10887 65,15,89,200, //mulps %xmm8,%xmm1
10888 102,15,56,51,211, //pmovzxwd %xmm3,%xmm2
10889 102,15,114,242,13, //pslld $0xd,%xmm2
10890 65,15,89,208, //mulps %xmm8,%xmm2
10891 102,65,15,105,217, //punpckhwd %xmm9,%xmm3
10892 102,15,114,243,13, //pslld $0xd,%xmm3
10893 65,15,89,216, //mulps %xmm8,%xmm3
10894 72,173, //lods %ds:(%rsi),%rax
10895 255,224, //jmpq *%rax
10896};
10897
10898CODE const uint8_t sk_store_f16_sse41[] = {
10899 72,173, //lods %ds:(%rsi),%rax
10900 72,139,0, //mov (%rax),%rax
10901 102,68,15,110,66,96, //movd 0x60(%rdx),%xmm8
10902 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
10903 102,69,15,111,200, //movdqa %xmm8,%xmm9
10904 68,15,89,200, //mulps %xmm0,%xmm9
10905 102,65,15,114,209,13, //psrld $0xd,%xmm9
10906 102,69,15,111,208, //movdqa %xmm8,%xmm10
10907 68,15,89,209, //mulps %xmm1,%xmm10
10908 102,65,15,114,210,13, //psrld $0xd,%xmm10
10909 102,69,15,111,216, //movdqa %xmm8,%xmm11
10910 68,15,89,218, //mulps %xmm2,%xmm11
10911 102,65,15,114,211,13, //psrld $0xd,%xmm11
10912 68,15,89,195, //mulps %xmm3,%xmm8
10913 102,65,15,114,208,13, //psrld $0xd,%xmm8
10914 102,65,15,115,250,2, //pslldq $0x2,%xmm10
10915 102,69,15,235,209, //por %xmm9,%xmm10
10916 102,65,15,115,248,2, //pslldq $0x2,%xmm8
10917 102,69,15,235,195, //por %xmm11,%xmm8
10918 102,69,15,111,202, //movdqa %xmm10,%xmm9
10919 102,69,15,98,200, //punpckldq %xmm8,%xmm9
10920 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
10921 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
10922 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
10923 72,173, //lods %ds:(%rsi),%rax
10924 255,224, //jmpq *%rax
10925};
10926
10927CODE const uint8_t sk_store_f32_sse41[] = {
10928 72,173, //lods %ds:(%rsi),%rax
10929 72,139,0, //mov (%rax),%rax
10930 72,137,249, //mov %rdi,%rcx
10931 72,193,225,4, //shl $0x4,%rcx
10932 68,15,40,192, //movaps %xmm0,%xmm8
10933 68,15,40,200, //movaps %xmm0,%xmm9
10934 68,15,20,201, //unpcklps %xmm1,%xmm9
10935 68,15,40,210, //movaps %xmm2,%xmm10
10936 68,15,40,218, //movaps %xmm2,%xmm11
10937 68,15,20,219, //unpcklps %xmm3,%xmm11
10938 68,15,21,193, //unpckhps %xmm1,%xmm8
10939 68,15,21,211, //unpckhps %xmm3,%xmm10
10940 69,15,40,225, //movaps %xmm9,%xmm12
10941 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
10942 102,69,15,21,203, //unpckhpd %xmm11,%xmm9
10943 69,15,40,216, //movaps %xmm8,%xmm11
10944 102,69,15,20,218, //unpcklpd %xmm10,%xmm11
10945 102,69,15,21,194, //unpckhpd %xmm10,%xmm8
10946 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
10947 102,68,15,17,76,8,16, //movupd %xmm9,0x10(%rax,%rcx,1)
10948 102,68,15,17,92,8,32, //movupd %xmm11,0x20(%rax,%rcx,1)
10949 102,68,15,17,68,8,48, //movupd %xmm8,0x30(%rax,%rcx,1)
10950 72,173, //lods %ds:(%rsi),%rax
10951 255,224, //jmpq *%rax
10952};
10953
10954CODE const uint8_t sk_clamp_x_sse41[] = {
10955 72,173, //lods %ds:(%rsi),%rax
10956 69,15,87,192, //xorps %xmm8,%xmm8
10957 68,15,95,192, //maxps %xmm0,%xmm8
10958 243,68,15,16,8, //movss (%rax),%xmm9
10959 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
10960 102,15,118,192, //pcmpeqd %xmm0,%xmm0
10961 102,65,15,254,193, //paddd %xmm9,%xmm0
10962 68,15,93,192, //minps %xmm0,%xmm8
10963 72,173, //lods %ds:(%rsi),%rax
10964 65,15,40,192, //movaps %xmm8,%xmm0
10965 255,224, //jmpq *%rax
10966};
10967
10968CODE const uint8_t sk_clamp_y_sse41[] = {
10969 72,173, //lods %ds:(%rsi),%rax
10970 69,15,87,192, //xorps %xmm8,%xmm8
10971 68,15,95,193, //maxps %xmm1,%xmm8
10972 243,68,15,16,8, //movss (%rax),%xmm9
10973 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
10974 102,15,118,201, //pcmpeqd %xmm1,%xmm1
10975 102,65,15,254,201, //paddd %xmm9,%xmm1
10976 68,15,93,193, //minps %xmm1,%xmm8
10977 72,173, //lods %ds:(%rsi),%rax
10978 65,15,40,200, //movaps %xmm8,%xmm1
10979 255,224, //jmpq *%rax
10980};
10981
10982CODE const uint8_t sk_repeat_x_sse41[] = {
10983 72,173, //lods %ds:(%rsi),%rax
10984 243,68,15,16,0, //movss (%rax),%xmm8
10985 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
10986 68,15,40,200, //movaps %xmm0,%xmm9
10987 69,15,94,200, //divps %xmm8,%xmm9
10988 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
10989 69,15,89,200, //mulps %xmm8,%xmm9
10990 65,15,92,193, //subps %xmm9,%xmm0
10991 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
10992 102,69,15,254,200, //paddd %xmm8,%xmm9
10993 65,15,93,193, //minps %xmm9,%xmm0
10994 72,173, //lods %ds:(%rsi),%rax
10995 255,224, //jmpq *%rax
10996};
10997
10998CODE const uint8_t sk_repeat_y_sse41[] = {
10999 72,173, //lods %ds:(%rsi),%rax
11000 243,68,15,16,0, //movss (%rax),%xmm8
11001 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11002 68,15,40,201, //movaps %xmm1,%xmm9
11003 69,15,94,200, //divps %xmm8,%xmm9
11004 102,69,15,58,8,201,1, //roundps $0x1,%xmm9,%xmm9
11005 69,15,89,200, //mulps %xmm8,%xmm9
11006 65,15,92,201, //subps %xmm9,%xmm1
11007 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
11008 102,69,15,254,200, //paddd %xmm8,%xmm9
11009 65,15,93,201, //minps %xmm9,%xmm1
11010 72,173, //lods %ds:(%rsi),%rax
11011 255,224, //jmpq *%rax
11012};
11013
11014CODE const uint8_t sk_mirror_x_sse41[] = {
11015 72,173, //lods %ds:(%rsi),%rax
11016 243,68,15,16,0, //movss (%rax),%xmm8
11017 69,15,40,200, //movaps %xmm8,%xmm9
11018 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11019 65,15,92,193, //subps %xmm9,%xmm0
11020 243,69,15,88,192, //addss %xmm8,%xmm8
11021 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11022 68,15,40,208, //movaps %xmm0,%xmm10
11023 69,15,94,208, //divps %xmm8,%xmm10
11024 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
11025 69,15,89,208, //mulps %xmm8,%xmm10
11026 65,15,92,194, //subps %xmm10,%xmm0
11027 65,15,92,193, //subps %xmm9,%xmm0
11028 69,15,87,192, //xorps %xmm8,%xmm8
11029 68,15,92,192, //subps %xmm0,%xmm8
11030 65,15,84,192, //andps %xmm8,%xmm0
11031 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
11032 102,69,15,254,193, //paddd %xmm9,%xmm8
11033 65,15,93,192, //minps %xmm8,%xmm0
11034 72,173, //lods %ds:(%rsi),%rax
11035 255,224, //jmpq *%rax
11036};
11037
11038CODE const uint8_t sk_mirror_y_sse41[] = {
11039 72,173, //lods %ds:(%rsi),%rax
11040 243,68,15,16,0, //movss (%rax),%xmm8
11041 69,15,40,200, //movaps %xmm8,%xmm9
11042 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11043 65,15,92,201, //subps %xmm9,%xmm1
11044 243,69,15,88,192, //addss %xmm8,%xmm8
11045 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11046 68,15,40,209, //movaps %xmm1,%xmm10
11047 69,15,94,208, //divps %xmm8,%xmm10
11048 102,69,15,58,8,210,1, //roundps $0x1,%xmm10,%xmm10
11049 69,15,89,208, //mulps %xmm8,%xmm10
11050 65,15,92,202, //subps %xmm10,%xmm1
11051 65,15,92,201, //subps %xmm9,%xmm1
11052 69,15,87,192, //xorps %xmm8,%xmm8
11053 68,15,92,193, //subps %xmm1,%xmm8
11054 65,15,84,200, //andps %xmm8,%xmm1
11055 102,69,15,118,192, //pcmpeqd %xmm8,%xmm8
11056 102,69,15,254,193, //paddd %xmm9,%xmm8
11057 65,15,93,200, //minps %xmm8,%xmm1
11058 72,173, //lods %ds:(%rsi),%rax
11059 255,224, //jmpq *%rax
11060};
11061
Mike Kleine9ed07d2017-03-07 12:28:11 -050011062CODE const uint8_t sk_luminance_to_alpha_sse41[] = {
11063 243,15,16,154,136,0,0,0, //movss 0x88(%rdx),%xmm3
11064 243,68,15,16,130,140,0,0,0, //movss 0x8c(%rdx),%xmm8
11065 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11066 15,89,216, //mulps %xmm0,%xmm3
11067 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11068 68,15,89,193, //mulps %xmm1,%xmm8
11069 68,15,88,195, //addps %xmm3,%xmm8
11070 243,15,16,154,144,0,0,0, //movss 0x90(%rdx),%xmm3
11071 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11072 15,89,218, //mulps %xmm2,%xmm3
11073 65,15,88,216, //addps %xmm8,%xmm3
11074 72,173, //lods %ds:(%rsi),%rax
11075 15,87,192, //xorps %xmm0,%xmm0
11076 15,87,201, //xorps %xmm1,%xmm1
11077 15,87,210, //xorps %xmm2,%xmm2
11078 255,224, //jmpq *%rax
11079};
11080
Mike Klein894d5612017-03-07 07:59:52 -050011081CODE const uint8_t sk_matrix_2x3_sse41[] = {
11082 68,15,40,201, //movaps %xmm1,%xmm9
11083 68,15,40,192, //movaps %xmm0,%xmm8
11084 72,173, //lods %ds:(%rsi),%rax
11085 243,15,16,0, //movss (%rax),%xmm0
11086 243,15,16,72,4, //movss 0x4(%rax),%xmm1
11087 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11088 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11089 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11090 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
11091 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11092 69,15,89,209, //mulps %xmm9,%xmm10
11093 69,15,88,211, //addps %xmm11,%xmm10
11094 65,15,89,192, //mulps %xmm8,%xmm0
11095 65,15,88,194, //addps %xmm10,%xmm0
11096 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11097 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
11098 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11099 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
11100 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11101 69,15,89,209, //mulps %xmm9,%xmm10
11102 69,15,88,211, //addps %xmm11,%xmm10
11103 65,15,89,200, //mulps %xmm8,%xmm1
11104 65,15,88,202, //addps %xmm10,%xmm1
11105 72,173, //lods %ds:(%rsi),%rax
11106 255,224, //jmpq *%rax
11107};
11108
11109CODE const uint8_t sk_matrix_3x4_sse41[] = {
11110 68,15,40,201, //movaps %xmm1,%xmm9
11111 68,15,40,192, //movaps %xmm0,%xmm8
11112 72,173, //lods %ds:(%rsi),%rax
11113 243,15,16,0, //movss (%rax),%xmm0
11114 243,15,16,72,4, //movss 0x4(%rax),%xmm1
11115 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11116 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
11117 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11118 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
11119 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11120 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
11121 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11122 68,15,89,218, //mulps %xmm2,%xmm11
11123 69,15,88,220, //addps %xmm12,%xmm11
11124 69,15,89,209, //mulps %xmm9,%xmm10
11125 69,15,88,211, //addps %xmm11,%xmm10
11126 65,15,89,192, //mulps %xmm8,%xmm0
11127 65,15,88,194, //addps %xmm10,%xmm0
11128 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11129 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
11130 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11131 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
11132 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11133 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
11134 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11135 68,15,89,218, //mulps %xmm2,%xmm11
11136 69,15,88,220, //addps %xmm12,%xmm11
11137 69,15,89,209, //mulps %xmm9,%xmm10
11138 69,15,88,211, //addps %xmm11,%xmm10
11139 65,15,89,200, //mulps %xmm8,%xmm1
11140 65,15,88,202, //addps %xmm10,%xmm1
11141 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11142 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11143 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
11144 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11145 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
11146 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11147 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
11148 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11149 68,15,89,226, //mulps %xmm2,%xmm12
11150 69,15,88,229, //addps %xmm13,%xmm12
11151 69,15,89,217, //mulps %xmm9,%xmm11
11152 69,15,88,220, //addps %xmm12,%xmm11
11153 69,15,89,208, //mulps %xmm8,%xmm10
11154 69,15,88,211, //addps %xmm11,%xmm10
11155 72,173, //lods %ds:(%rsi),%rax
11156 65,15,40,210, //movaps %xmm10,%xmm2
11157 255,224, //jmpq *%rax
11158};
11159
Mike Kleine9ed07d2017-03-07 12:28:11 -050011160CODE const uint8_t sk_matrix_4x5_sse41[] = {
11161 68,15,40,201, //movaps %xmm1,%xmm9
11162 68,15,40,192, //movaps %xmm0,%xmm8
11163 72,173, //lods %ds:(%rsi),%rax
11164 243,15,16,0, //movss (%rax),%xmm0
11165 243,15,16,72,4, //movss 0x4(%rax),%xmm1
11166 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11167 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
11168 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11169 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
11170 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11171 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
11172 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11173 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
11174 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11175 68,15,89,227, //mulps %xmm3,%xmm12
11176 69,15,88,229, //addps %xmm13,%xmm12
11177 68,15,89,218, //mulps %xmm2,%xmm11
11178 69,15,88,220, //addps %xmm12,%xmm11
11179 69,15,89,209, //mulps %xmm9,%xmm10
11180 69,15,88,211, //addps %xmm11,%xmm10
11181 65,15,89,192, //mulps %xmm8,%xmm0
11182 65,15,88,194, //addps %xmm10,%xmm0
11183 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11184 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
11185 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11186 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
11187 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11188 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
11189 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11190 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
11191 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11192 68,15,89,227, //mulps %xmm3,%xmm12
11193 69,15,88,229, //addps %xmm13,%xmm12
11194 68,15,89,218, //mulps %xmm2,%xmm11
11195 69,15,88,220, //addps %xmm12,%xmm11
11196 69,15,89,209, //mulps %xmm9,%xmm10
11197 69,15,88,211, //addps %xmm11,%xmm10
11198 65,15,89,200, //mulps %xmm8,%xmm1
11199 65,15,88,202, //addps %xmm10,%xmm1
11200 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11201 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11202 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
11203 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11204 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
11205 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11206 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
11207 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11208 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
11209 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
11210 68,15,89,235, //mulps %xmm3,%xmm13
11211 69,15,88,238, //addps %xmm14,%xmm13
11212 68,15,89,226, //mulps %xmm2,%xmm12
11213 69,15,88,229, //addps %xmm13,%xmm12
11214 69,15,89,217, //mulps %xmm9,%xmm11
11215 69,15,88,220, //addps %xmm12,%xmm11
11216 69,15,89,208, //mulps %xmm8,%xmm10
11217 69,15,88,211, //addps %xmm11,%xmm10
11218 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
11219 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11220 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
11221 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11222 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
11223 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11224 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
11225 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
11226 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
11227 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
11228 68,15,89,243, //mulps %xmm3,%xmm14
11229 69,15,88,247, //addps %xmm15,%xmm14
11230 68,15,89,234, //mulps %xmm2,%xmm13
11231 69,15,88,238, //addps %xmm14,%xmm13
11232 69,15,89,225, //mulps %xmm9,%xmm12
11233 69,15,88,229, //addps %xmm13,%xmm12
11234 69,15,89,216, //mulps %xmm8,%xmm11
11235 69,15,88,220, //addps %xmm12,%xmm11
11236 72,173, //lods %ds:(%rsi),%rax
11237 65,15,40,210, //movaps %xmm10,%xmm2
11238 65,15,40,219, //movaps %xmm11,%xmm3
11239 255,224, //jmpq *%rax
11240};
11241
Mike Klein894d5612017-03-07 07:59:52 -050011242CODE const uint8_t sk_matrix_perspective_sse41[] = {
11243 68,15,40,192, //movaps %xmm0,%xmm8
11244 72,173, //lods %ds:(%rsi),%rax
11245 243,15,16,0, //movss (%rax),%xmm0
11246 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
11247 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11248 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11249 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
11250 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11251 68,15,89,201, //mulps %xmm1,%xmm9
11252 69,15,88,202, //addps %xmm10,%xmm9
11253 65,15,89,192, //mulps %xmm8,%xmm0
11254 65,15,88,193, //addps %xmm9,%xmm0
11255 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
11256 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11257 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
11258 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11259 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
11260 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11261 68,15,89,209, //mulps %xmm1,%xmm10
11262 69,15,88,211, //addps %xmm11,%xmm10
11263 69,15,89,200, //mulps %xmm8,%xmm9
11264 69,15,88,202, //addps %xmm10,%xmm9
11265 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
11266 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11267 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
11268 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11269 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
11270 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11271 68,15,89,217, //mulps %xmm1,%xmm11
11272 69,15,88,220, //addps %xmm12,%xmm11
11273 69,15,89,208, //mulps %xmm8,%xmm10
11274 69,15,88,211, //addps %xmm11,%xmm10
11275 65,15,83,202, //rcpps %xmm10,%xmm1
11276 15,89,193, //mulps %xmm1,%xmm0
11277 68,15,89,201, //mulps %xmm1,%xmm9
11278 72,173, //lods %ds:(%rsi),%rax
11279 65,15,40,201, //movaps %xmm9,%xmm1
11280 255,224, //jmpq *%rax
11281};
11282
11283CODE const uint8_t sk_linear_gradient_2stops_sse41[] = {
11284 72,173, //lods %ds:(%rsi),%rax
11285 68,15,16,8, //movups (%rax),%xmm9
11286 15,16,88,16, //movups 0x10(%rax),%xmm3
11287 68,15,40,195, //movaps %xmm3,%xmm8
11288 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11289 65,15,40,201, //movaps %xmm9,%xmm1
11290 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11291 68,15,89,192, //mulps %xmm0,%xmm8
11292 68,15,88,193, //addps %xmm1,%xmm8
11293 15,40,203, //movaps %xmm3,%xmm1
11294 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
11295 65,15,40,209, //movaps %xmm9,%xmm2
11296 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
11297 15,89,200, //mulps %xmm0,%xmm1
11298 15,88,202, //addps %xmm2,%xmm1
11299 15,40,211, //movaps %xmm3,%xmm2
11300 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
11301 69,15,40,209, //movaps %xmm9,%xmm10
11302 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
11303 15,89,208, //mulps %xmm0,%xmm2
11304 65,15,88,210, //addps %xmm10,%xmm2
11305 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
11306 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
11307 15,89,216, //mulps %xmm0,%xmm3
11308 65,15,88,217, //addps %xmm9,%xmm3
11309 72,173, //lods %ds:(%rsi),%rax
11310 65,15,40,192, //movaps %xmm8,%xmm0
11311 255,224, //jmpq *%rax
11312};
11313
11314CODE const uint8_t sk_start_pipeline_sse2[] = {
11315 65,87, //push %r15
11316 65,86, //push %r14
11317 65,85, //push %r13
11318 65,84, //push %r12
11319 86, //push %rsi
11320 87, //push %rdi
11321 83, //push %rbx
11322 72,129,236,160,0,0,0, //sub $0xa0,%rsp
11323 68,15,41,188,36,144,0,0,0, //movaps %xmm15,0x90(%rsp)
11324 68,15,41,180,36,128,0,0,0, //movaps %xmm14,0x80(%rsp)
11325 68,15,41,108,36,112, //movaps %xmm13,0x70(%rsp)
11326 68,15,41,100,36,96, //movaps %xmm12,0x60(%rsp)
11327 68,15,41,92,36,80, //movaps %xmm11,0x50(%rsp)
11328 68,15,41,84,36,64, //movaps %xmm10,0x40(%rsp)
11329 68,15,41,76,36,48, //movaps %xmm9,0x30(%rsp)
11330 68,15,41,68,36,32, //movaps %xmm8,0x20(%rsp)
11331 15,41,124,36,16, //movaps %xmm7,0x10(%rsp)
11332 15,41,52,36, //movaps %xmm6,(%rsp)
11333 77,137,207, //mov %r9,%r15
11334 77,137,198, //mov %r8,%r14
11335 72,137,203, //mov %rcx,%rbx
11336 72,137,214, //mov %rdx,%rsi
11337 72,173, //lods %ds:(%rsi),%rax
11338 73,137,196, //mov %rax,%r12
11339 73,137,245, //mov %rsi,%r13
11340 72,141,67,4, //lea 0x4(%rbx),%rax
11341 76,57,248, //cmp %r15,%rax
11342 118,5, //jbe 73 <_sk_start_pipeline_sse2+0x73>
11343 72,137,216, //mov %rbx,%rax
11344 235,52, //jmp a7 <_sk_start_pipeline_sse2+0xa7>
11345 15,87,192, //xorps %xmm0,%xmm0
11346 15,87,201, //xorps %xmm1,%xmm1
11347 15,87,210, //xorps %xmm2,%xmm2
11348 15,87,219, //xorps %xmm3,%xmm3
11349 15,87,228, //xorps %xmm4,%xmm4
11350 15,87,237, //xorps %xmm5,%xmm5
11351 15,87,246, //xorps %xmm6,%xmm6
11352 15,87,255, //xorps %xmm7,%xmm7
11353 72,137,223, //mov %rbx,%rdi
11354 76,137,238, //mov %r13,%rsi
11355 76,137,242, //mov %r14,%rdx
11356 65,255,212, //callq *%r12
11357 72,141,67,4, //lea 0x4(%rbx),%rax
11358 72,131,195,8, //add $0x8,%rbx
11359 76,57,251, //cmp %r15,%rbx
11360 72,137,195, //mov %rax,%rbx
11361 118,204, //jbe 73 <_sk_start_pipeline_sse2+0x73>
11362 15,40,52,36, //movaps (%rsp),%xmm6
11363 15,40,124,36,16, //movaps 0x10(%rsp),%xmm7
11364 68,15,40,68,36,32, //movaps 0x20(%rsp),%xmm8
11365 68,15,40,76,36,48, //movaps 0x30(%rsp),%xmm9
11366 68,15,40,84,36,64, //movaps 0x40(%rsp),%xmm10
11367 68,15,40,92,36,80, //movaps 0x50(%rsp),%xmm11
11368 68,15,40,100,36,96, //movaps 0x60(%rsp),%xmm12
11369 68,15,40,108,36,112, //movaps 0x70(%rsp),%xmm13
11370 68,15,40,180,36,128,0,0,0, //movaps 0x80(%rsp),%xmm14
11371 68,15,40,188,36,144,0,0,0, //movaps 0x90(%rsp),%xmm15
11372 72,129,196,160,0,0,0, //add $0xa0,%rsp
11373 91, //pop %rbx
11374 95, //pop %rdi
11375 94, //pop %rsi
11376 65,92, //pop %r12
11377 65,93, //pop %r13
11378 65,94, //pop %r14
11379 65,95, //pop %r15
11380 195, //retq
11381};
11382
11383CODE const uint8_t sk_just_return_sse2[] = {
11384 195, //retq
11385};
11386
11387CODE const uint8_t sk_seed_shader_sse2[] = {
11388 72,173, //lods %ds:(%rsi),%rax
11389 102,15,110,199, //movd %edi,%xmm0
11390 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
11391 15,91,200, //cvtdq2ps %xmm0,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011392 185,0,0,0,63, //mov $0x3f000000,%ecx
11393 102,15,110,209, //movd %ecx,%xmm2
11394 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
11395 15,88,202, //addps %xmm2,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050011396 15,16,66,20, //movups 0x14(%rdx),%xmm0
11397 15,88,193, //addps %xmm1,%xmm0
11398 102,15,110,8, //movd (%rax),%xmm1
11399 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
11400 15,91,201, //cvtdq2ps %xmm1,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011401 15,88,202, //addps %xmm2,%xmm1
11402 184,0,0,128,63, //mov $0x3f800000,%eax
11403 102,15,110,208, //movd %eax,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050011404 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
11405 72,173, //lods %ds:(%rsi),%rax
11406 15,87,219, //xorps %xmm3,%xmm3
11407 15,87,228, //xorps %xmm4,%xmm4
11408 15,87,237, //xorps %xmm5,%xmm5
11409 15,87,246, //xorps %xmm6,%xmm6
11410 15,87,255, //xorps %xmm7,%xmm7
11411 255,224, //jmpq *%rax
11412};
11413
11414CODE const uint8_t sk_constant_color_sse2[] = {
11415 72,173, //lods %ds:(%rsi),%rax
11416 15,16,24, //movups (%rax),%xmm3
11417 15,40,195, //movaps %xmm3,%xmm0
11418 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11419 15,40,203, //movaps %xmm3,%xmm1
11420 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
11421 15,40,211, //movaps %xmm3,%xmm2
11422 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
11423 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
11424 72,173, //lods %ds:(%rsi),%rax
11425 255,224, //jmpq *%rax
11426};
11427
11428CODE const uint8_t sk_clear_sse2[] = {
11429 72,173, //lods %ds:(%rsi),%rax
11430 15,87,192, //xorps %xmm0,%xmm0
11431 15,87,201, //xorps %xmm1,%xmm1
11432 15,87,210, //xorps %xmm2,%xmm2
11433 15,87,219, //xorps %xmm3,%xmm3
11434 255,224, //jmpq *%rax
11435};
11436
11437CODE const uint8_t sk_plus__sse2[] = {
11438 15,88,196, //addps %xmm4,%xmm0
11439 15,88,205, //addps %xmm5,%xmm1
11440 15,88,214, //addps %xmm6,%xmm2
11441 15,88,223, //addps %xmm7,%xmm3
11442 72,173, //lods %ds:(%rsi),%rax
11443 255,224, //jmpq *%rax
11444};
11445
11446CODE const uint8_t sk_srcover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011447 184,0,0,128,63, //mov $0x3f800000,%eax
11448 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011449 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11450 68,15,92,195, //subps %xmm3,%xmm8
11451 69,15,40,200, //movaps %xmm8,%xmm9
11452 68,15,89,204, //mulps %xmm4,%xmm9
11453 65,15,88,193, //addps %xmm9,%xmm0
11454 69,15,40,200, //movaps %xmm8,%xmm9
11455 68,15,89,205, //mulps %xmm5,%xmm9
11456 65,15,88,201, //addps %xmm9,%xmm1
11457 69,15,40,200, //movaps %xmm8,%xmm9
11458 68,15,89,206, //mulps %xmm6,%xmm9
11459 65,15,88,209, //addps %xmm9,%xmm2
11460 68,15,89,199, //mulps %xmm7,%xmm8
11461 65,15,88,216, //addps %xmm8,%xmm3
11462 72,173, //lods %ds:(%rsi),%rax
11463 255,224, //jmpq *%rax
11464};
11465
11466CODE const uint8_t sk_dstover_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011467 184,0,0,128,63, //mov $0x3f800000,%eax
11468 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011469 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11470 68,15,92,199, //subps %xmm7,%xmm8
11471 65,15,89,192, //mulps %xmm8,%xmm0
11472 15,88,196, //addps %xmm4,%xmm0
11473 65,15,89,200, //mulps %xmm8,%xmm1
11474 15,88,205, //addps %xmm5,%xmm1
11475 65,15,89,208, //mulps %xmm8,%xmm2
11476 15,88,214, //addps %xmm6,%xmm2
11477 65,15,89,216, //mulps %xmm8,%xmm3
11478 15,88,223, //addps %xmm7,%xmm3
11479 72,173, //lods %ds:(%rsi),%rax
11480 255,224, //jmpq *%rax
11481};
11482
11483CODE const uint8_t sk_clamp_0_sse2[] = {
11484 69,15,87,192, //xorps %xmm8,%xmm8
11485 65,15,95,192, //maxps %xmm8,%xmm0
11486 65,15,95,200, //maxps %xmm8,%xmm1
11487 65,15,95,208, //maxps %xmm8,%xmm2
11488 65,15,95,216, //maxps %xmm8,%xmm3
11489 72,173, //lods %ds:(%rsi),%rax
11490 255,224, //jmpq *%rax
11491};
11492
11493CODE const uint8_t sk_clamp_1_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011494 184,0,0,128,63, //mov $0x3f800000,%eax
11495 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011496 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11497 65,15,93,192, //minps %xmm8,%xmm0
11498 65,15,93,200, //minps %xmm8,%xmm1
11499 65,15,93,208, //minps %xmm8,%xmm2
11500 65,15,93,216, //minps %xmm8,%xmm3
11501 72,173, //lods %ds:(%rsi),%rax
11502 255,224, //jmpq *%rax
11503};
11504
11505CODE const uint8_t sk_clamp_a_sse2[] = {
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011506 184,0,0,128,63, //mov $0x3f800000,%eax
11507 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011508 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11509 65,15,93,216, //minps %xmm8,%xmm3
11510 15,93,195, //minps %xmm3,%xmm0
11511 15,93,203, //minps %xmm3,%xmm1
11512 15,93,211, //minps %xmm3,%xmm2
11513 72,173, //lods %ds:(%rsi),%rax
11514 255,224, //jmpq *%rax
11515};
11516
11517CODE const uint8_t sk_set_rgb_sse2[] = {
11518 72,173, //lods %ds:(%rsi),%rax
11519 243,15,16,0, //movss (%rax),%xmm0
11520 243,15,16,72,4, //movss 0x4(%rax),%xmm1
11521 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11522 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11523 243,15,16,80,8, //movss 0x8(%rax),%xmm2
11524 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
11525 72,173, //lods %ds:(%rsi),%rax
11526 255,224, //jmpq *%rax
11527};
11528
11529CODE const uint8_t sk_swap_rb_sse2[] = {
11530 68,15,40,192, //movaps %xmm0,%xmm8
11531 72,173, //lods %ds:(%rsi),%rax
11532 15,40,194, //movaps %xmm2,%xmm0
11533 65,15,40,208, //movaps %xmm8,%xmm2
11534 255,224, //jmpq *%rax
11535};
11536
11537CODE const uint8_t sk_swap_sse2[] = {
11538 68,15,40,195, //movaps %xmm3,%xmm8
11539 68,15,40,202, //movaps %xmm2,%xmm9
11540 68,15,40,209, //movaps %xmm1,%xmm10
11541 68,15,40,216, //movaps %xmm0,%xmm11
11542 72,173, //lods %ds:(%rsi),%rax
11543 15,40,196, //movaps %xmm4,%xmm0
11544 15,40,205, //movaps %xmm5,%xmm1
11545 15,40,214, //movaps %xmm6,%xmm2
11546 15,40,223, //movaps %xmm7,%xmm3
11547 65,15,40,227, //movaps %xmm11,%xmm4
11548 65,15,40,234, //movaps %xmm10,%xmm5
11549 65,15,40,241, //movaps %xmm9,%xmm6
11550 65,15,40,248, //movaps %xmm8,%xmm7
11551 255,224, //jmpq *%rax
11552};
11553
11554CODE const uint8_t sk_move_src_dst_sse2[] = {
11555 72,173, //lods %ds:(%rsi),%rax
11556 15,40,224, //movaps %xmm0,%xmm4
11557 15,40,233, //movaps %xmm1,%xmm5
11558 15,40,242, //movaps %xmm2,%xmm6
11559 15,40,251, //movaps %xmm3,%xmm7
11560 255,224, //jmpq *%rax
11561};
11562
11563CODE const uint8_t sk_move_dst_src_sse2[] = {
11564 72,173, //lods %ds:(%rsi),%rax
11565 15,40,196, //movaps %xmm4,%xmm0
11566 15,40,205, //movaps %xmm5,%xmm1
11567 15,40,214, //movaps %xmm6,%xmm2
11568 15,40,223, //movaps %xmm7,%xmm3
11569 255,224, //jmpq *%rax
11570};
11571
11572CODE const uint8_t sk_premul_sse2[] = {
11573 15,89,195, //mulps %xmm3,%xmm0
11574 15,89,203, //mulps %xmm3,%xmm1
11575 15,89,211, //mulps %xmm3,%xmm2
11576 72,173, //lods %ds:(%rsi),%rax
11577 255,224, //jmpq *%rax
11578};
11579
11580CODE const uint8_t sk_unpremul_sse2[] = {
11581 69,15,87,192, //xorps %xmm8,%xmm8
11582 68,15,194,195,0, //cmpeqps %xmm3,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011583 184,0,0,128,63, //mov $0x3f800000,%eax
11584 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011585 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11586 68,15,94,203, //divps %xmm3,%xmm9
11587 69,15,85,193, //andnps %xmm9,%xmm8
11588 65,15,89,192, //mulps %xmm8,%xmm0
11589 65,15,89,200, //mulps %xmm8,%xmm1
11590 65,15,89,208, //mulps %xmm8,%xmm2
11591 72,173, //lods %ds:(%rsi),%rax
11592 255,224, //jmpq *%rax
11593};
11594
11595CODE const uint8_t sk_from_srgb_sse2[] = {
11596 243,68,15,16,66,64, //movss 0x40(%rdx),%xmm8
11597 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11598 69,15,40,232, //movaps %xmm8,%xmm13
11599 68,15,89,232, //mulps %xmm0,%xmm13
11600 68,15,40,224, //movaps %xmm0,%xmm12
11601 69,15,89,228, //mulps %xmm12,%xmm12
11602 243,68,15,16,74,60, //movss 0x3c(%rdx),%xmm9
11603 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11604 243,68,15,16,82,52, //movss 0x34(%rdx),%xmm10
11605 243,68,15,16,90,56, //movss 0x38(%rdx),%xmm11
11606 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11607 69,15,40,241, //movaps %xmm9,%xmm14
11608 68,15,89,240, //mulps %xmm0,%xmm14
11609 69,15,88,243, //addps %xmm11,%xmm14
11610 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11611 69,15,89,244, //mulps %xmm12,%xmm14
11612 69,15,88,242, //addps %xmm10,%xmm14
11613 243,68,15,16,98,68, //movss 0x44(%rdx),%xmm12
11614 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11615 65,15,194,196,1, //cmpltps %xmm12,%xmm0
11616 68,15,84,232, //andps %xmm0,%xmm13
11617 65,15,85,198, //andnps %xmm14,%xmm0
11618 65,15,86,197, //orps %xmm13,%xmm0
11619 69,15,40,232, //movaps %xmm8,%xmm13
11620 68,15,89,233, //mulps %xmm1,%xmm13
11621 68,15,40,241, //movaps %xmm1,%xmm14
11622 69,15,89,246, //mulps %xmm14,%xmm14
11623 69,15,40,249, //movaps %xmm9,%xmm15
11624 68,15,89,249, //mulps %xmm1,%xmm15
11625 69,15,88,251, //addps %xmm11,%xmm15
11626 69,15,89,254, //mulps %xmm14,%xmm15
11627 69,15,88,250, //addps %xmm10,%xmm15
11628 65,15,194,204,1, //cmpltps %xmm12,%xmm1
11629 68,15,84,233, //andps %xmm1,%xmm13
11630 65,15,85,207, //andnps %xmm15,%xmm1
11631 65,15,86,205, //orps %xmm13,%xmm1
11632 68,15,89,194, //mulps %xmm2,%xmm8
11633 68,15,40,234, //movaps %xmm2,%xmm13
11634 69,15,89,237, //mulps %xmm13,%xmm13
11635 68,15,89,202, //mulps %xmm2,%xmm9
11636 69,15,88,203, //addps %xmm11,%xmm9
11637 69,15,89,205, //mulps %xmm13,%xmm9
11638 69,15,88,202, //addps %xmm10,%xmm9
11639 65,15,194,212,1, //cmpltps %xmm12,%xmm2
11640 68,15,84,194, //andps %xmm2,%xmm8
11641 65,15,85,209, //andnps %xmm9,%xmm2
11642 65,15,86,208, //orps %xmm8,%xmm2
11643 72,173, //lods %ds:(%rsi),%rax
11644 255,224, //jmpq *%rax
11645};
11646
11647CODE const uint8_t sk_to_srgb_sse2[] = {
11648 72,131,236,40, //sub $0x28,%rsp
11649 15,41,124,36,16, //movaps %xmm7,0x10(%rsp)
11650 15,41,52,36, //movaps %xmm6,(%rsp)
11651 15,40,245, //movaps %xmm5,%xmm6
11652 15,40,236, //movaps %xmm4,%xmm5
11653 15,40,227, //movaps %xmm3,%xmm4
11654 68,15,82,192, //rsqrtps %xmm0,%xmm8
11655 69,15,83,232, //rcpps %xmm8,%xmm13
11656 69,15,82,248, //rsqrtps %xmm8,%xmm15
11657 243,15,16,26, //movss (%rdx),%xmm3
11658 243,68,15,16,66,72, //movss 0x48(%rdx),%xmm8
11659 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11660 69,15,40,240, //movaps %xmm8,%xmm14
11661 68,15,89,240, //mulps %xmm0,%xmm14
11662 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11663 243,68,15,16,82,76, //movss 0x4c(%rdx),%xmm10
11664 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
11665 243,68,15,16,90,80, //movss 0x50(%rdx),%xmm11
11666 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
11667 243,68,15,16,98,84, //movss 0x54(%rdx),%xmm12
11668 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
11669 69,15,89,235, //mulps %xmm11,%xmm13
11670 69,15,88,236, //addps %xmm12,%xmm13
11671 69,15,89,250, //mulps %xmm10,%xmm15
11672 69,15,88,253, //addps %xmm13,%xmm15
11673 68,15,40,203, //movaps %xmm3,%xmm9
11674 69,15,93,207, //minps %xmm15,%xmm9
11675 243,68,15,16,106,88, //movss 0x58(%rdx),%xmm13
11676 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
11677 65,15,194,197,1, //cmpltps %xmm13,%xmm0
11678 68,15,84,240, //andps %xmm0,%xmm14
11679 65,15,85,193, //andnps %xmm9,%xmm0
11680 65,15,86,198, //orps %xmm14,%xmm0
11681 68,15,82,201, //rsqrtps %xmm1,%xmm9
11682 69,15,83,241, //rcpps %xmm9,%xmm14
11683 69,15,82,201, //rsqrtps %xmm9,%xmm9
11684 69,15,89,243, //mulps %xmm11,%xmm14
11685 69,15,88,244, //addps %xmm12,%xmm14
11686 69,15,89,202, //mulps %xmm10,%xmm9
11687 69,15,88,206, //addps %xmm14,%xmm9
11688 68,15,40,243, //movaps %xmm3,%xmm14
11689 69,15,93,241, //minps %xmm9,%xmm14
11690 69,15,40,200, //movaps %xmm8,%xmm9
11691 68,15,89,201, //mulps %xmm1,%xmm9
11692 65,15,194,205,1, //cmpltps %xmm13,%xmm1
11693 68,15,84,201, //andps %xmm1,%xmm9
11694 65,15,85,206, //andnps %xmm14,%xmm1
11695 65,15,86,201, //orps %xmm9,%xmm1
11696 68,15,82,202, //rsqrtps %xmm2,%xmm9
11697 69,15,83,241, //rcpps %xmm9,%xmm14
11698 69,15,89,243, //mulps %xmm11,%xmm14
11699 69,15,88,244, //addps %xmm12,%xmm14
11700 65,15,82,249, //rsqrtps %xmm9,%xmm7
11701 65,15,89,250, //mulps %xmm10,%xmm7
11702 65,15,88,254, //addps %xmm14,%xmm7
11703 15,93,223, //minps %xmm7,%xmm3
11704 68,15,89,194, //mulps %xmm2,%xmm8
11705 65,15,194,213,1, //cmpltps %xmm13,%xmm2
11706 68,15,84,194, //andps %xmm2,%xmm8
11707 15,85,211, //andnps %xmm3,%xmm2
11708 65,15,86,208, //orps %xmm8,%xmm2
11709 72,173, //lods %ds:(%rsi),%rax
11710 15,40,220, //movaps %xmm4,%xmm3
11711 15,40,229, //movaps %xmm5,%xmm4
11712 15,40,238, //movaps %xmm6,%xmm5
11713 15,40,52,36, //movaps (%rsp),%xmm6
11714 15,40,124,36,16, //movaps 0x10(%rsp),%xmm7
11715 72,131,196,40, //add $0x28,%rsp
11716 255,224, //jmpq *%rax
11717};
11718
11719CODE const uint8_t sk_scale_1_float_sse2[] = {
11720 72,173, //lods %ds:(%rsi),%rax
11721 243,68,15,16,0, //movss (%rax),%xmm8
11722 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11723 65,15,89,192, //mulps %xmm8,%xmm0
11724 65,15,89,200, //mulps %xmm8,%xmm1
11725 65,15,89,208, //mulps %xmm8,%xmm2
11726 65,15,89,216, //mulps %xmm8,%xmm3
11727 72,173, //lods %ds:(%rsi),%rax
11728 255,224, //jmpq *%rax
11729};
11730
11731CODE const uint8_t sk_scale_u8_sse2[] = {
11732 72,173, //lods %ds:(%rsi),%rax
11733 72,139,0, //mov (%rax),%rax
11734 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
11735 102,69,15,239,201, //pxor %xmm9,%xmm9
11736 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
11737 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
11738 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011739 184,129,128,128,59, //mov $0x3b808081,%eax
11740 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011741 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11742 69,15,89,200, //mulps %xmm8,%xmm9
11743 65,15,89,193, //mulps %xmm9,%xmm0
11744 65,15,89,201, //mulps %xmm9,%xmm1
11745 65,15,89,209, //mulps %xmm9,%xmm2
11746 65,15,89,217, //mulps %xmm9,%xmm3
11747 72,173, //lods %ds:(%rsi),%rax
11748 255,224, //jmpq *%rax
11749};
11750
11751CODE const uint8_t sk_lerp_1_float_sse2[] = {
11752 72,173, //lods %ds:(%rsi),%rax
11753 243,68,15,16,0, //movss (%rax),%xmm8
11754 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11755 15,92,196, //subps %xmm4,%xmm0
11756 65,15,89,192, //mulps %xmm8,%xmm0
11757 15,88,196, //addps %xmm4,%xmm0
11758 15,92,205, //subps %xmm5,%xmm1
11759 65,15,89,200, //mulps %xmm8,%xmm1
11760 15,88,205, //addps %xmm5,%xmm1
11761 15,92,214, //subps %xmm6,%xmm2
11762 65,15,89,208, //mulps %xmm8,%xmm2
11763 15,88,214, //addps %xmm6,%xmm2
11764 15,92,223, //subps %xmm7,%xmm3
11765 65,15,89,216, //mulps %xmm8,%xmm3
11766 15,88,223, //addps %xmm7,%xmm3
11767 72,173, //lods %ds:(%rsi),%rax
11768 255,224, //jmpq *%rax
11769};
11770
11771CODE const uint8_t sk_lerp_u8_sse2[] = {
11772 72,173, //lods %ds:(%rsi),%rax
11773 72,139,0, //mov (%rax),%rax
11774 102,68,15,110,4,56, //movd (%rax,%rdi,1),%xmm8
11775 102,69,15,239,201, //pxor %xmm9,%xmm9
11776 102,69,15,96,193, //punpcklbw %xmm9,%xmm8
11777 102,69,15,97,193, //punpcklwd %xmm9,%xmm8
11778 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011779 184,129,128,128,59, //mov $0x3b808081,%eax
11780 102,68,15,110,200, //movd %eax,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011781 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11782 69,15,89,200, //mulps %xmm8,%xmm9
11783 15,92,196, //subps %xmm4,%xmm0
11784 65,15,89,193, //mulps %xmm9,%xmm0
11785 15,88,196, //addps %xmm4,%xmm0
11786 15,92,205, //subps %xmm5,%xmm1
11787 65,15,89,201, //mulps %xmm9,%xmm1
11788 15,88,205, //addps %xmm5,%xmm1
11789 15,92,214, //subps %xmm6,%xmm2
11790 65,15,89,209, //mulps %xmm9,%xmm2
11791 15,88,214, //addps %xmm6,%xmm2
11792 15,92,223, //subps %xmm7,%xmm3
11793 65,15,89,217, //mulps %xmm9,%xmm3
11794 15,88,223, //addps %xmm7,%xmm3
11795 72,173, //lods %ds:(%rsi),%rax
11796 255,224, //jmpq *%rax
11797};
11798
11799CODE const uint8_t sk_lerp_565_sse2[] = {
11800 72,173, //lods %ds:(%rsi),%rax
11801 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011802 243,68,15,126,12,120, //movq (%rax,%rdi,2),%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011803 102,15,239,219, //pxor %xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011804 102,68,15,97,203, //punpcklwd %xmm3,%xmm9
Mike Klein894d5612017-03-07 07:59:52 -050011805 102,15,110,90,104, //movd 0x68(%rdx),%xmm3
11806 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011807 102,65,15,219,217, //pand %xmm9,%xmm3
11808 68,15,91,211, //cvtdq2ps %xmm3,%xmm10
11809 243,68,15,16,90,116, //movss 0x74(%rdx),%xmm11
11810 243,68,15,16,66,120, //movss 0x78(%rdx),%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050011811 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011812 69,15,89,218, //mulps %xmm10,%xmm11
11813 102,15,110,90,108, //movd 0x6c(%rdx),%xmm3
11814 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
11815 102,65,15,219,217, //pand %xmm9,%xmm3
11816 15,91,219, //cvtdq2ps %xmm3,%xmm3
11817 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11818 68,15,89,195, //mulps %xmm3,%xmm8
11819 102,15,110,90,112, //movd 0x70(%rdx),%xmm3
11820 102,15,112,219,0, //pshufd $0x0,%xmm3,%xmm3
11821 102,65,15,219,217, //pand %xmm9,%xmm3
11822 68,15,91,203, //cvtdq2ps %xmm3,%xmm9
11823 243,15,16,90,124, //movss 0x7c(%rdx),%xmm3
11824 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11825 65,15,89,217, //mulps %xmm9,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011826 15,92,196, //subps %xmm4,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011827 65,15,89,195, //mulps %xmm11,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050011828 15,88,196, //addps %xmm4,%xmm0
11829 15,92,205, //subps %xmm5,%xmm1
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011830 65,15,89,200, //mulps %xmm8,%xmm1
Mike Klein894d5612017-03-07 07:59:52 -050011831 15,88,205, //addps %xmm5,%xmm1
11832 15,92,214, //subps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011833 15,89,211, //mulps %xmm3,%xmm2
Mike Klein894d5612017-03-07 07:59:52 -050011834 15,88,214, //addps %xmm6,%xmm2
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050011835 184,0,0,128,63, //mov $0x3f800000,%eax
11836 102,15,110,216, //movd %eax,%xmm3
Mike Klein894d5612017-03-07 07:59:52 -050011837 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11838 72,173, //lods %ds:(%rsi),%rax
11839 255,224, //jmpq *%rax
11840};
11841
11842CODE const uint8_t sk_load_tables_sse2[] = {
11843 72,173, //lods %ds:(%rsi),%rax
11844 72,139,8, //mov (%rax),%rcx
11845 76,139,64,8, //mov 0x8(%rax),%r8
11846 243,68,15,111,4,185, //movdqu (%rcx,%rdi,4),%xmm8
11847 102,15,110,66,16, //movd 0x10(%rdx),%xmm0
11848 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
11849 102,69,15,111,200, //movdqa %xmm8,%xmm9
11850 102,65,15,114,209,8, //psrld $0x8,%xmm9
11851 102,68,15,219,200, //pand %xmm0,%xmm9
11852 102,69,15,111,208, //movdqa %xmm8,%xmm10
11853 102,65,15,114,210,16, //psrld $0x10,%xmm10
11854 102,68,15,219,208, //pand %xmm0,%xmm10
11855 102,65,15,219,192, //pand %xmm8,%xmm0
11856 102,15,112,216,78, //pshufd $0x4e,%xmm0,%xmm3
11857 102,72,15,126,217, //movq %xmm3,%rcx
11858 65,137,201, //mov %ecx,%r9d
11859 72,193,233,32, //shr $0x20,%rcx
11860 102,73,15,126,194, //movq %xmm0,%r10
11861 69,137,211, //mov %r10d,%r11d
11862 73,193,234,32, //shr $0x20,%r10
11863 243,67,15,16,28,144, //movss (%r8,%r10,4),%xmm3
11864 243,65,15,16,4,136, //movss (%r8,%rcx,4),%xmm0
11865 15,20,216, //unpcklps %xmm0,%xmm3
11866 243,67,15,16,4,152, //movss (%r8,%r11,4),%xmm0
11867 243,67,15,16,12,136, //movss (%r8,%r9,4),%xmm1
11868 15,20,193, //unpcklps %xmm1,%xmm0
11869 15,20,195, //unpcklps %xmm3,%xmm0
11870 72,139,72,16, //mov 0x10(%rax),%rcx
11871 102,65,15,112,201,78, //pshufd $0x4e,%xmm9,%xmm1
11872 102,73,15,126,200, //movq %xmm1,%r8
11873 69,137,193, //mov %r8d,%r9d
11874 73,193,232,32, //shr $0x20,%r8
11875 102,77,15,126,202, //movq %xmm9,%r10
11876 69,137,211, //mov %r10d,%r11d
11877 73,193,234,32, //shr $0x20,%r10
11878 243,66,15,16,28,145, //movss (%rcx,%r10,4),%xmm3
11879 243,66,15,16,12,129, //movss (%rcx,%r8,4),%xmm1
11880 15,20,217, //unpcklps %xmm1,%xmm3
11881 243,66,15,16,12,153, //movss (%rcx,%r11,4),%xmm1
11882 243,66,15,16,20,137, //movss (%rcx,%r9,4),%xmm2
11883 15,20,202, //unpcklps %xmm2,%xmm1
11884 15,20,203, //unpcklps %xmm3,%xmm1
11885 72,139,64,24, //mov 0x18(%rax),%rax
11886 102,65,15,112,210,78, //pshufd $0x4e,%xmm10,%xmm2
11887 102,72,15,126,209, //movq %xmm2,%rcx
11888 65,137,200, //mov %ecx,%r8d
11889 72,193,233,32, //shr $0x20,%rcx
11890 102,77,15,126,209, //movq %xmm10,%r9
11891 69,137,202, //mov %r9d,%r10d
11892 73,193,233,32, //shr $0x20,%r9
11893 243,70,15,16,12,136, //movss (%rax,%r9,4),%xmm9
11894 243,15,16,20,136, //movss (%rax,%rcx,4),%xmm2
11895 68,15,20,202, //unpcklps %xmm2,%xmm9
11896 243,66,15,16,20,144, //movss (%rax,%r10,4),%xmm2
11897 243,66,15,16,28,128, //movss (%rax,%r8,4),%xmm3
11898 15,20,211, //unpcklps %xmm3,%xmm2
11899 65,15,20,209, //unpcklps %xmm9,%xmm2
11900 102,65,15,114,208,24, //psrld $0x18,%xmm8
11901 69,15,91,192, //cvtdq2ps %xmm8,%xmm8
11902 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
11903 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11904 65,15,89,216, //mulps %xmm8,%xmm3
11905 72,173, //lods %ds:(%rsi),%rax
11906 255,224, //jmpq *%rax
11907};
11908
11909CODE const uint8_t sk_load_a8_sse2[] = {
11910 72,173, //lods %ds:(%rsi),%rax
11911 72,139,0, //mov (%rax),%rax
11912 102,15,110,4,56, //movd (%rax,%rdi,1),%xmm0
11913 102,15,239,201, //pxor %xmm1,%xmm1
11914 102,15,96,193, //punpcklbw %xmm1,%xmm0
11915 102,15,97,193, //punpcklwd %xmm1,%xmm0
11916 15,91,192, //cvtdq2ps %xmm0,%xmm0
11917 243,15,16,90,12, //movss 0xc(%rdx),%xmm3
11918 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11919 15,89,216, //mulps %xmm0,%xmm3
11920 72,173, //lods %ds:(%rsi),%rax
11921 15,87,192, //xorps %xmm0,%xmm0
11922 102,15,239,201, //pxor %xmm1,%xmm1
11923 15,87,210, //xorps %xmm2,%xmm2
11924 255,224, //jmpq *%rax
11925};
11926
11927CODE const uint8_t sk_store_a8_sse2[] = {
11928 72,173, //lods %ds:(%rsi),%rax
11929 72,139,0, //mov (%rax),%rax
11930 243,68,15,16,66,8, //movss 0x8(%rdx),%xmm8
11931 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11932 68,15,89,195, //mulps %xmm3,%xmm8
11933 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
11934 102,65,15,114,240,16, //pslld $0x10,%xmm8
11935 102,65,15,114,224,16, //psrad $0x10,%xmm8
11936 102,69,15,107,192, //packssdw %xmm8,%xmm8
11937 102,69,15,103,192, //packuswb %xmm8,%xmm8
11938 102,68,15,126,4,56, //movd %xmm8,(%rax,%rdi,1)
11939 72,173, //lods %ds:(%rsi),%rax
11940 255,224, //jmpq *%rax
11941};
11942
11943CODE const uint8_t sk_load_565_sse2[] = {
11944 72,173, //lods %ds:(%rsi),%rax
11945 72,139,0, //mov (%rax),%rax
11946 243,68,15,126,12,120, //movq (%rax,%rdi,2),%xmm9
11947 102,15,239,192, //pxor %xmm0,%xmm0
11948 102,68,15,97,200, //punpcklwd %xmm0,%xmm9
11949 102,15,110,66,104, //movd 0x68(%rdx),%xmm0
11950 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
11951 102,65,15,219,193, //pand %xmm9,%xmm0
11952 15,91,200, //cvtdq2ps %xmm0,%xmm1
11953 243,15,16,26, //movss (%rdx),%xmm3
11954 243,15,16,66,116, //movss 0x74(%rdx),%xmm0
11955 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
11956 15,89,193, //mulps %xmm1,%xmm0
11957 102,15,110,74,108, //movd 0x6c(%rdx),%xmm1
11958 102,15,112,201,0, //pshufd $0x0,%xmm1,%xmm1
11959 102,65,15,219,201, //pand %xmm9,%xmm1
11960 68,15,91,193, //cvtdq2ps %xmm1,%xmm8
11961 243,15,16,74,120, //movss 0x78(%rdx),%xmm1
11962 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
11963 65,15,89,200, //mulps %xmm8,%xmm1
11964 102,15,110,82,112, //movd 0x70(%rdx),%xmm2
11965 102,15,112,210,0, //pshufd $0x0,%xmm2,%xmm2
11966 102,65,15,219,209, //pand %xmm9,%xmm2
11967 68,15,91,194, //cvtdq2ps %xmm2,%xmm8
11968 243,15,16,82,124, //movss 0x7c(%rdx),%xmm2
11969 15,198,210,0, //shufps $0x0,%xmm2,%xmm2
11970 65,15,89,208, //mulps %xmm8,%xmm2
11971 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
11972 72,173, //lods %ds:(%rsi),%rax
11973 255,224, //jmpq *%rax
11974};
11975
11976CODE const uint8_t sk_store_565_sse2[] = {
11977 72,173, //lods %ds:(%rsi),%rax
11978 72,139,0, //mov (%rax),%rax
11979 243,68,15,16,130,128,0,0,0, //movss 0x80(%rdx),%xmm8
11980 243,68,15,16,138,132,0,0,0, //movss 0x84(%rdx),%xmm9
11981 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
11982 69,15,40,208, //movaps %xmm8,%xmm10
11983 68,15,89,208, //mulps %xmm0,%xmm10
11984 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
11985 102,65,15,114,242,11, //pslld $0xb,%xmm10
11986 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
11987 68,15,89,201, //mulps %xmm1,%xmm9
11988 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
11989 102,65,15,114,241,5, //pslld $0x5,%xmm9
11990 102,69,15,235,202, //por %xmm10,%xmm9
11991 68,15,89,194, //mulps %xmm2,%xmm8
11992 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
11993 102,69,15,86,193, //orpd %xmm9,%xmm8
11994 102,65,15,114,240,16, //pslld $0x10,%xmm8
11995 102,65,15,114,224,16, //psrad $0x10,%xmm8
11996 102,69,15,107,192, //packssdw %xmm8,%xmm8
11997 102,68,15,214,4,120, //movq %xmm8,(%rax,%rdi,2)
11998 72,173, //lods %ds:(%rsi),%rax
11999 255,224, //jmpq *%rax
12000};
12001
12002CODE const uint8_t sk_load_8888_sse2[] = {
12003 72,173, //lods %ds:(%rsi),%rax
12004 72,139,0, //mov (%rax),%rax
12005 243,15,111,28,184, //movdqu (%rax,%rdi,4),%xmm3
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012006 184,255,0,0,0, //mov $0xff,%eax
12007 102,15,110,192, //movd %eax,%xmm0
Mike Klein894d5612017-03-07 07:59:52 -050012008 102,15,112,192,0, //pshufd $0x0,%xmm0,%xmm0
12009 102,15,111,203, //movdqa %xmm3,%xmm1
12010 102,15,114,209,8, //psrld $0x8,%xmm1
12011 102,15,219,200, //pand %xmm0,%xmm1
12012 102,15,111,211, //movdqa %xmm3,%xmm2
12013 102,15,114,210,16, //psrld $0x10,%xmm2
12014 102,15,219,208, //pand %xmm0,%xmm2
12015 102,15,219,195, //pand %xmm3,%xmm0
12016 15,91,192, //cvtdq2ps %xmm0,%xmm0
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012017 184,129,128,128,59, //mov $0x3b808081,%eax
12018 102,68,15,110,192, //movd %eax,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012019 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12020 65,15,89,192, //mulps %xmm8,%xmm0
12021 15,91,201, //cvtdq2ps %xmm1,%xmm1
12022 65,15,89,200, //mulps %xmm8,%xmm1
12023 15,91,210, //cvtdq2ps %xmm2,%xmm2
12024 65,15,89,208, //mulps %xmm8,%xmm2
12025 102,15,114,211,24, //psrld $0x18,%xmm3
12026 15,91,219, //cvtdq2ps %xmm3,%xmm3
12027 65,15,89,216, //mulps %xmm8,%xmm3
12028 72,173, //lods %ds:(%rsi),%rax
12029 255,224, //jmpq *%rax
12030};
12031
12032CODE const uint8_t sk_store_8888_sse2[] = {
12033 72,173, //lods %ds:(%rsi),%rax
12034 72,139,0, //mov (%rax),%rax
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012035 185,0,0,127,67, //mov $0x437f0000,%ecx
12036 102,68,15,110,193, //movd %ecx,%xmm8
Mike Klein894d5612017-03-07 07:59:52 -050012037 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12038 69,15,40,200, //movaps %xmm8,%xmm9
12039 68,15,89,200, //mulps %xmm0,%xmm9
12040 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
12041 69,15,40,208, //movaps %xmm8,%xmm10
12042 68,15,89,209, //mulps %xmm1,%xmm10
12043 102,69,15,91,210, //cvtps2dq %xmm10,%xmm10
12044 102,65,15,114,242,8, //pslld $0x8,%xmm10
12045 102,69,15,235,209, //por %xmm9,%xmm10
12046 69,15,40,200, //movaps %xmm8,%xmm9
12047 68,15,89,202, //mulps %xmm2,%xmm9
12048 102,69,15,91,201, //cvtps2dq %xmm9,%xmm9
12049 102,65,15,114,241,16, //pslld $0x10,%xmm9
12050 68,15,89,195, //mulps %xmm3,%xmm8
12051 102,69,15,91,192, //cvtps2dq %xmm8,%xmm8
12052 102,65,15,114,240,24, //pslld $0x18,%xmm8
12053 102,69,15,235,193, //por %xmm9,%xmm8
12054 102,69,15,235,194, //por %xmm10,%xmm8
12055 243,68,15,127,4,184, //movdqu %xmm8,(%rax,%rdi,4)
12056 72,173, //lods %ds:(%rsi),%rax
12057 255,224, //jmpq *%rax
12058};
12059
12060CODE const uint8_t sk_load_f16_sse2[] = {
12061 72,173, //lods %ds:(%rsi),%rax
12062 72,139,0, //mov (%rax),%rax
12063 243,15,111,4,248, //movdqu (%rax,%rdi,8),%xmm0
12064 243,15,111,76,248,16, //movdqu 0x10(%rax,%rdi,8),%xmm1
12065 102,15,111,208, //movdqa %xmm0,%xmm2
12066 102,15,97,209, //punpcklwd %xmm1,%xmm2
12067 102,15,105,193, //punpckhwd %xmm1,%xmm0
12068 102,68,15,111,194, //movdqa %xmm2,%xmm8
12069 102,68,15,97,192, //punpcklwd %xmm0,%xmm8
12070 102,15,105,208, //punpckhwd %xmm0,%xmm2
12071 102,15,110,66,100, //movd 0x64(%rdx),%xmm0
12072 102,15,112,216,0, //pshufd $0x0,%xmm0,%xmm3
12073 102,15,111,203, //movdqa %xmm3,%xmm1
12074 102,65,15,101,200, //pcmpgtw %xmm8,%xmm1
12075 102,65,15,223,200, //pandn %xmm8,%xmm1
12076 102,15,101,218, //pcmpgtw %xmm2,%xmm3
12077 102,15,223,218, //pandn %xmm2,%xmm3
12078 102,69,15,239,192, //pxor %xmm8,%xmm8
12079 102,15,111,193, //movdqa %xmm1,%xmm0
12080 102,65,15,97,192, //punpcklwd %xmm8,%xmm0
12081 102,15,114,240,13, //pslld $0xd,%xmm0
12082 102,15,110,82,92, //movd 0x5c(%rdx),%xmm2
12083 102,68,15,112,202,0, //pshufd $0x0,%xmm2,%xmm9
12084 65,15,89,193, //mulps %xmm9,%xmm0
12085 102,65,15,105,200, //punpckhwd %xmm8,%xmm1
12086 102,15,114,241,13, //pslld $0xd,%xmm1
12087 65,15,89,201, //mulps %xmm9,%xmm1
12088 102,15,111,211, //movdqa %xmm3,%xmm2
12089 102,65,15,97,208, //punpcklwd %xmm8,%xmm2
12090 102,15,114,242,13, //pslld $0xd,%xmm2
12091 65,15,89,209, //mulps %xmm9,%xmm2
12092 102,65,15,105,216, //punpckhwd %xmm8,%xmm3
12093 102,15,114,243,13, //pslld $0xd,%xmm3
12094 65,15,89,217, //mulps %xmm9,%xmm3
12095 72,173, //lods %ds:(%rsi),%rax
12096 255,224, //jmpq *%rax
12097};
12098
12099CODE const uint8_t sk_store_f16_sse2[] = {
12100 72,173, //lods %ds:(%rsi),%rax
12101 72,139,0, //mov (%rax),%rax
12102 102,68,15,110,66,96, //movd 0x60(%rdx),%xmm8
12103 102,69,15,112,192,0, //pshufd $0x0,%xmm8,%xmm8
12104 102,69,15,111,200, //movdqa %xmm8,%xmm9
12105 68,15,89,200, //mulps %xmm0,%xmm9
12106 102,65,15,114,209,13, //psrld $0xd,%xmm9
12107 102,69,15,111,208, //movdqa %xmm8,%xmm10
12108 68,15,89,209, //mulps %xmm1,%xmm10
12109 102,65,15,114,210,13, //psrld $0xd,%xmm10
12110 102,69,15,111,216, //movdqa %xmm8,%xmm11
12111 68,15,89,218, //mulps %xmm2,%xmm11
12112 102,65,15,114,211,13, //psrld $0xd,%xmm11
12113 68,15,89,195, //mulps %xmm3,%xmm8
12114 102,65,15,114,208,13, //psrld $0xd,%xmm8
12115 102,65,15,115,250,2, //pslldq $0x2,%xmm10
12116 102,69,15,235,209, //por %xmm9,%xmm10
12117 102,65,15,115,248,2, //pslldq $0x2,%xmm8
12118 102,69,15,235,195, //por %xmm11,%xmm8
12119 102,69,15,111,202, //movdqa %xmm10,%xmm9
12120 102,69,15,98,200, //punpckldq %xmm8,%xmm9
12121 243,68,15,127,12,248, //movdqu %xmm9,(%rax,%rdi,8)
12122 102,69,15,106,208, //punpckhdq %xmm8,%xmm10
12123 243,68,15,127,84,248,16, //movdqu %xmm10,0x10(%rax,%rdi,8)
12124 72,173, //lods %ds:(%rsi),%rax
12125 255,224, //jmpq *%rax
12126};
12127
12128CODE const uint8_t sk_store_f32_sse2[] = {
12129 72,173, //lods %ds:(%rsi),%rax
12130 72,139,0, //mov (%rax),%rax
12131 72,137,249, //mov %rdi,%rcx
12132 72,193,225,4, //shl $0x4,%rcx
12133 68,15,40,192, //movaps %xmm0,%xmm8
12134 68,15,40,200, //movaps %xmm0,%xmm9
12135 68,15,20,201, //unpcklps %xmm1,%xmm9
12136 68,15,40,210, //movaps %xmm2,%xmm10
12137 68,15,40,218, //movaps %xmm2,%xmm11
12138 68,15,20,219, //unpcklps %xmm3,%xmm11
12139 68,15,21,193, //unpckhps %xmm1,%xmm8
12140 68,15,21,211, //unpckhps %xmm3,%xmm10
12141 69,15,40,225, //movaps %xmm9,%xmm12
12142 102,69,15,20,227, //unpcklpd %xmm11,%xmm12
12143 102,69,15,21,203, //unpckhpd %xmm11,%xmm9
12144 69,15,40,216, //movaps %xmm8,%xmm11
12145 102,69,15,20,218, //unpcklpd %xmm10,%xmm11
12146 102,69,15,21,194, //unpckhpd %xmm10,%xmm8
12147 102,68,15,17,36,8, //movupd %xmm12,(%rax,%rcx,1)
12148 102,68,15,17,76,8,16, //movupd %xmm9,0x10(%rax,%rcx,1)
12149 102,68,15,17,92,8,32, //movupd %xmm11,0x20(%rax,%rcx,1)
12150 102,68,15,17,68,8,48, //movupd %xmm8,0x30(%rax,%rcx,1)
12151 72,173, //lods %ds:(%rsi),%rax
12152 255,224, //jmpq *%rax
12153};
12154
12155CODE const uint8_t sk_clamp_x_sse2[] = {
12156 72,173, //lods %ds:(%rsi),%rax
12157 69,15,87,192, //xorps %xmm8,%xmm8
12158 68,15,95,192, //maxps %xmm0,%xmm8
12159 243,68,15,16,8, //movss (%rax),%xmm9
12160 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12161 102,15,118,192, //pcmpeqd %xmm0,%xmm0
12162 102,65,15,254,193, //paddd %xmm9,%xmm0
12163 68,15,93,192, //minps %xmm0,%xmm8
12164 72,173, //lods %ds:(%rsi),%rax
12165 65,15,40,192, //movaps %xmm8,%xmm0
12166 255,224, //jmpq *%rax
12167};
12168
12169CODE const uint8_t sk_clamp_y_sse2[] = {
12170 72,173, //lods %ds:(%rsi),%rax
12171 69,15,87,192, //xorps %xmm8,%xmm8
12172 68,15,95,193, //maxps %xmm1,%xmm8
12173 243,68,15,16,8, //movss (%rax),%xmm9
12174 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12175 102,15,118,201, //pcmpeqd %xmm1,%xmm1
12176 102,65,15,254,201, //paddd %xmm9,%xmm1
12177 68,15,93,193, //minps %xmm1,%xmm8
12178 72,173, //lods %ds:(%rsi),%rax
12179 65,15,40,200, //movaps %xmm8,%xmm1
12180 255,224, //jmpq *%rax
12181};
12182
12183CODE const uint8_t sk_repeat_x_sse2[] = {
12184 72,173, //lods %ds:(%rsi),%rax
12185 243,68,15,16,0, //movss (%rax),%xmm8
12186 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12187 68,15,40,200, //movaps %xmm0,%xmm9
12188 69,15,94,200, //divps %xmm8,%xmm9
12189 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
12190 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
12191 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012192 184,0,0,128,63, //mov $0x3f800000,%eax
12193 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050012194 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12195 69,15,84,217, //andps %xmm9,%xmm11
12196 69,15,92,211, //subps %xmm11,%xmm10
12197 69,15,89,208, //mulps %xmm8,%xmm10
12198 65,15,92,194, //subps %xmm10,%xmm0
12199 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12200 102,69,15,254,200, //paddd %xmm8,%xmm9
12201 65,15,93,193, //minps %xmm9,%xmm0
12202 72,173, //lods %ds:(%rsi),%rax
12203 255,224, //jmpq *%rax
12204};
12205
12206CODE const uint8_t sk_repeat_y_sse2[] = {
12207 72,173, //lods %ds:(%rsi),%rax
12208 243,68,15,16,0, //movss (%rax),%xmm8
12209 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12210 68,15,40,201, //movaps %xmm1,%xmm9
12211 69,15,94,200, //divps %xmm8,%xmm9
12212 243,69,15,91,209, //cvttps2dq %xmm9,%xmm10
12213 69,15,91,210, //cvtdq2ps %xmm10,%xmm10
12214 69,15,194,202,1, //cmpltps %xmm10,%xmm9
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012215 184,0,0,128,63, //mov $0x3f800000,%eax
12216 102,68,15,110,216, //movd %eax,%xmm11
Mike Klein894d5612017-03-07 07:59:52 -050012217 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12218 69,15,84,217, //andps %xmm9,%xmm11
12219 69,15,92,211, //subps %xmm11,%xmm10
12220 69,15,89,208, //mulps %xmm8,%xmm10
12221 65,15,92,202, //subps %xmm10,%xmm1
12222 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12223 102,69,15,254,200, //paddd %xmm8,%xmm9
12224 65,15,93,201, //minps %xmm9,%xmm1
12225 72,173, //lods %ds:(%rsi),%rax
12226 255,224, //jmpq *%rax
12227};
12228
12229CODE const uint8_t sk_mirror_x_sse2[] = {
12230 72,173, //lods %ds:(%rsi),%rax
12231 243,68,15,16,8, //movss (%rax),%xmm9
12232 69,15,40,193, //movaps %xmm9,%xmm8
12233 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12234 65,15,92,192, //subps %xmm8,%xmm0
12235 243,69,15,88,201, //addss %xmm9,%xmm9
12236 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12237 68,15,40,208, //movaps %xmm0,%xmm10
12238 69,15,94,209, //divps %xmm9,%xmm10
12239 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
12240 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
12241 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012242 184,0,0,128,63, //mov $0x3f800000,%eax
12243 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050012244 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12245 69,15,84,226, //andps %xmm10,%xmm12
12246 69,15,87,210, //xorps %xmm10,%xmm10
12247 69,15,92,220, //subps %xmm12,%xmm11
12248 69,15,89,217, //mulps %xmm9,%xmm11
12249 65,15,92,195, //subps %xmm11,%xmm0
12250 65,15,92,192, //subps %xmm8,%xmm0
12251 68,15,92,208, //subps %xmm0,%xmm10
12252 65,15,84,194, //andps %xmm10,%xmm0
12253 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12254 102,69,15,254,200, //paddd %xmm8,%xmm9
12255 65,15,93,193, //minps %xmm9,%xmm0
12256 72,173, //lods %ds:(%rsi),%rax
12257 255,224, //jmpq *%rax
12258};
12259
12260CODE const uint8_t sk_mirror_y_sse2[] = {
12261 72,173, //lods %ds:(%rsi),%rax
12262 243,68,15,16,8, //movss (%rax),%xmm9
12263 69,15,40,193, //movaps %xmm9,%xmm8
12264 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12265 65,15,92,200, //subps %xmm8,%xmm1
12266 243,69,15,88,201, //addss %xmm9,%xmm9
12267 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12268 68,15,40,209, //movaps %xmm1,%xmm10
12269 69,15,94,209, //divps %xmm9,%xmm10
12270 243,69,15,91,218, //cvttps2dq %xmm10,%xmm11
12271 69,15,91,219, //cvtdq2ps %xmm11,%xmm11
12272 69,15,194,211,1, //cmpltps %xmm11,%xmm10
Mike Kleinfdf3bbe2017-03-07 14:41:06 -050012273 184,0,0,128,63, //mov $0x3f800000,%eax
12274 102,68,15,110,224, //movd %eax,%xmm12
Mike Klein894d5612017-03-07 07:59:52 -050012275 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12276 69,15,84,226, //andps %xmm10,%xmm12
12277 69,15,87,210, //xorps %xmm10,%xmm10
12278 69,15,92,220, //subps %xmm12,%xmm11
12279 69,15,89,217, //mulps %xmm9,%xmm11
12280 65,15,92,203, //subps %xmm11,%xmm1
12281 65,15,92,200, //subps %xmm8,%xmm1
12282 68,15,92,209, //subps %xmm1,%xmm10
12283 65,15,84,202, //andps %xmm10,%xmm1
12284 102,69,15,118,201, //pcmpeqd %xmm9,%xmm9
12285 102,69,15,254,200, //paddd %xmm8,%xmm9
12286 65,15,93,201, //minps %xmm9,%xmm1
12287 72,173, //lods %ds:(%rsi),%rax
12288 255,224, //jmpq *%rax
12289};
12290
Mike Kleine9ed07d2017-03-07 12:28:11 -050012291CODE const uint8_t sk_luminance_to_alpha_sse2[] = {
12292 243,15,16,154,136,0,0,0, //movss 0x88(%rdx),%xmm3
12293 243,68,15,16,130,140,0,0,0, //movss 0x8c(%rdx),%xmm8
12294 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12295 15,89,216, //mulps %xmm0,%xmm3
12296 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12297 68,15,89,193, //mulps %xmm1,%xmm8
12298 68,15,88,195, //addps %xmm3,%xmm8
12299 243,15,16,154,144,0,0,0, //movss 0x90(%rdx),%xmm3
12300 15,198,219,0, //shufps $0x0,%xmm3,%xmm3
12301 15,89,218, //mulps %xmm2,%xmm3
12302 65,15,88,216, //addps %xmm8,%xmm3
12303 72,173, //lods %ds:(%rsi),%rax
12304 15,87,192, //xorps %xmm0,%xmm0
12305 15,87,201, //xorps %xmm1,%xmm1
12306 15,87,210, //xorps %xmm2,%xmm2
12307 255,224, //jmpq *%rax
12308};
12309
Mike Klein894d5612017-03-07 07:59:52 -050012310CODE const uint8_t sk_matrix_2x3_sse2[] = {
12311 68,15,40,201, //movaps %xmm1,%xmm9
12312 68,15,40,192, //movaps %xmm0,%xmm8
12313 72,173, //lods %ds:(%rsi),%rax
12314 243,15,16,0, //movss (%rax),%xmm0
12315 243,15,16,72,4, //movss 0x4(%rax),%xmm1
12316 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12317 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12318 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12319 243,68,15,16,88,16, //movss 0x10(%rax),%xmm11
12320 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12321 69,15,89,209, //mulps %xmm9,%xmm10
12322 69,15,88,211, //addps %xmm11,%xmm10
12323 65,15,89,192, //mulps %xmm8,%xmm0
12324 65,15,88,194, //addps %xmm10,%xmm0
12325 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12326 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
12327 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12328 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
12329 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12330 69,15,89,209, //mulps %xmm9,%xmm10
12331 69,15,88,211, //addps %xmm11,%xmm10
12332 65,15,89,200, //mulps %xmm8,%xmm1
12333 65,15,88,202, //addps %xmm10,%xmm1
12334 72,173, //lods %ds:(%rsi),%rax
12335 255,224, //jmpq *%rax
12336};
12337
12338CODE const uint8_t sk_matrix_3x4_sse2[] = {
12339 68,15,40,201, //movaps %xmm1,%xmm9
12340 68,15,40,192, //movaps %xmm0,%xmm8
12341 72,173, //lods %ds:(%rsi),%rax
12342 243,15,16,0, //movss (%rax),%xmm0
12343 243,15,16,72,4, //movss 0x4(%rax),%xmm1
12344 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12345 243,68,15,16,80,12, //movss 0xc(%rax),%xmm10
12346 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12347 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
12348 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12349 243,68,15,16,96,36, //movss 0x24(%rax),%xmm12
12350 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12351 68,15,89,218, //mulps %xmm2,%xmm11
12352 69,15,88,220, //addps %xmm12,%xmm11
12353 69,15,89,209, //mulps %xmm9,%xmm10
12354 69,15,88,211, //addps %xmm11,%xmm10
12355 65,15,89,192, //mulps %xmm8,%xmm0
12356 65,15,88,194, //addps %xmm10,%xmm0
12357 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12358 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
12359 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12360 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
12361 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12362 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
12363 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12364 68,15,89,218, //mulps %xmm2,%xmm11
12365 69,15,88,220, //addps %xmm12,%xmm11
12366 69,15,89,209, //mulps %xmm9,%xmm10
12367 69,15,88,211, //addps %xmm11,%xmm10
12368 65,15,89,200, //mulps %xmm8,%xmm1
12369 65,15,88,202, //addps %xmm10,%xmm1
12370 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12371 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12372 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
12373 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12374 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
12375 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12376 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
12377 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12378 68,15,89,226, //mulps %xmm2,%xmm12
12379 69,15,88,229, //addps %xmm13,%xmm12
12380 69,15,89,217, //mulps %xmm9,%xmm11
12381 69,15,88,220, //addps %xmm12,%xmm11
12382 69,15,89,208, //mulps %xmm8,%xmm10
12383 69,15,88,211, //addps %xmm11,%xmm10
12384 72,173, //lods %ds:(%rsi),%rax
12385 65,15,40,210, //movaps %xmm10,%xmm2
12386 255,224, //jmpq *%rax
12387};
12388
Mike Kleine9ed07d2017-03-07 12:28:11 -050012389CODE const uint8_t sk_matrix_4x5_sse2[] = {
12390 68,15,40,201, //movaps %xmm1,%xmm9
12391 68,15,40,192, //movaps %xmm0,%xmm8
12392 72,173, //lods %ds:(%rsi),%rax
12393 243,15,16,0, //movss (%rax),%xmm0
12394 243,15,16,72,4, //movss 0x4(%rax),%xmm1
12395 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12396 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
12397 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12398 243,68,15,16,88,32, //movss 0x20(%rax),%xmm11
12399 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12400 243,68,15,16,96,48, //movss 0x30(%rax),%xmm12
12401 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12402 243,68,15,16,104,64, //movss 0x40(%rax),%xmm13
12403 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12404 68,15,89,227, //mulps %xmm3,%xmm12
12405 69,15,88,229, //addps %xmm13,%xmm12
12406 68,15,89,218, //mulps %xmm2,%xmm11
12407 69,15,88,220, //addps %xmm12,%xmm11
12408 69,15,89,209, //mulps %xmm9,%xmm10
12409 69,15,88,211, //addps %xmm11,%xmm10
12410 65,15,89,192, //mulps %xmm8,%xmm0
12411 65,15,88,194, //addps %xmm10,%xmm0
12412 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12413 243,68,15,16,80,20, //movss 0x14(%rax),%xmm10
12414 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12415 243,68,15,16,88,36, //movss 0x24(%rax),%xmm11
12416 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12417 243,68,15,16,96,52, //movss 0x34(%rax),%xmm12
12418 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12419 243,68,15,16,104,68, //movss 0x44(%rax),%xmm13
12420 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12421 68,15,89,227, //mulps %xmm3,%xmm12
12422 69,15,88,229, //addps %xmm13,%xmm12
12423 68,15,89,218, //mulps %xmm2,%xmm11
12424 69,15,88,220, //addps %xmm12,%xmm11
12425 69,15,89,209, //mulps %xmm9,%xmm10
12426 69,15,88,211, //addps %xmm11,%xmm10
12427 65,15,89,200, //mulps %xmm8,%xmm1
12428 65,15,88,202, //addps %xmm10,%xmm1
12429 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12430 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12431 243,68,15,16,88,24, //movss 0x18(%rax),%xmm11
12432 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12433 243,68,15,16,96,40, //movss 0x28(%rax),%xmm12
12434 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12435 243,68,15,16,104,56, //movss 0x38(%rax),%xmm13
12436 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12437 243,68,15,16,112,72, //movss 0x48(%rax),%xmm14
12438 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
12439 68,15,89,235, //mulps %xmm3,%xmm13
12440 69,15,88,238, //addps %xmm14,%xmm13
12441 68,15,89,226, //mulps %xmm2,%xmm12
12442 69,15,88,229, //addps %xmm13,%xmm12
12443 69,15,89,217, //mulps %xmm9,%xmm11
12444 69,15,88,220, //addps %xmm12,%xmm11
12445 69,15,89,208, //mulps %xmm8,%xmm10
12446 69,15,88,211, //addps %xmm11,%xmm10
12447 243,68,15,16,88,12, //movss 0xc(%rax),%xmm11
12448 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12449 243,68,15,16,96,28, //movss 0x1c(%rax),%xmm12
12450 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12451 243,68,15,16,104,44, //movss 0x2c(%rax),%xmm13
12452 69,15,198,237,0, //shufps $0x0,%xmm13,%xmm13
12453 243,68,15,16,112,60, //movss 0x3c(%rax),%xmm14
12454 69,15,198,246,0, //shufps $0x0,%xmm14,%xmm14
12455 243,68,15,16,120,76, //movss 0x4c(%rax),%xmm15
12456 69,15,198,255,0, //shufps $0x0,%xmm15,%xmm15
12457 68,15,89,243, //mulps %xmm3,%xmm14
12458 69,15,88,247, //addps %xmm15,%xmm14
12459 68,15,89,234, //mulps %xmm2,%xmm13
12460 69,15,88,238, //addps %xmm14,%xmm13
12461 69,15,89,225, //mulps %xmm9,%xmm12
12462 69,15,88,229, //addps %xmm13,%xmm12
12463 69,15,89,216, //mulps %xmm8,%xmm11
12464 69,15,88,220, //addps %xmm12,%xmm11
12465 72,173, //lods %ds:(%rsi),%rax
12466 65,15,40,210, //movaps %xmm10,%xmm2
12467 65,15,40,219, //movaps %xmm11,%xmm3
12468 255,224, //jmpq *%rax
12469};
12470
Mike Klein894d5612017-03-07 07:59:52 -050012471CODE const uint8_t sk_matrix_perspective_sse2[] = {
12472 68,15,40,192, //movaps %xmm0,%xmm8
12473 72,173, //lods %ds:(%rsi),%rax
12474 243,15,16,0, //movss (%rax),%xmm0
12475 243,68,15,16,72,4, //movss 0x4(%rax),%xmm9
12476 15,198,192,0, //shufps $0x0,%xmm0,%xmm0
12477 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12478 243,68,15,16,80,8, //movss 0x8(%rax),%xmm10
12479 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12480 68,15,89,201, //mulps %xmm1,%xmm9
12481 69,15,88,202, //addps %xmm10,%xmm9
12482 65,15,89,192, //mulps %xmm8,%xmm0
12483 65,15,88,193, //addps %xmm9,%xmm0
12484 243,68,15,16,72,12, //movss 0xc(%rax),%xmm9
12485 69,15,198,201,0, //shufps $0x0,%xmm9,%xmm9
12486 243,68,15,16,80,16, //movss 0x10(%rax),%xmm10
12487 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12488 243,68,15,16,88,20, //movss 0x14(%rax),%xmm11
12489 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12490 68,15,89,209, //mulps %xmm1,%xmm10
12491 69,15,88,211, //addps %xmm11,%xmm10
12492 69,15,89,200, //mulps %xmm8,%xmm9
12493 69,15,88,202, //addps %xmm10,%xmm9
12494 243,68,15,16,80,24, //movss 0x18(%rax),%xmm10
12495 69,15,198,210,0, //shufps $0x0,%xmm10,%xmm10
12496 243,68,15,16,88,28, //movss 0x1c(%rax),%xmm11
12497 69,15,198,219,0, //shufps $0x0,%xmm11,%xmm11
12498 243,68,15,16,96,32, //movss 0x20(%rax),%xmm12
12499 69,15,198,228,0, //shufps $0x0,%xmm12,%xmm12
12500 68,15,89,217, //mulps %xmm1,%xmm11
12501 69,15,88,220, //addps %xmm12,%xmm11
12502 69,15,89,208, //mulps %xmm8,%xmm10
12503 69,15,88,211, //addps %xmm11,%xmm10
12504 65,15,83,202, //rcpps %xmm10,%xmm1
12505 15,89,193, //mulps %xmm1,%xmm0
12506 68,15,89,201, //mulps %xmm1,%xmm9
12507 72,173, //lods %ds:(%rsi),%rax
12508 65,15,40,201, //movaps %xmm9,%xmm1
12509 255,224, //jmpq *%rax
12510};
12511
12512CODE const uint8_t sk_linear_gradient_2stops_sse2[] = {
12513 72,173, //lods %ds:(%rsi),%rax
12514 68,15,16,8, //movups (%rax),%xmm9
12515 15,16,88,16, //movups 0x10(%rax),%xmm3
12516 68,15,40,195, //movaps %xmm3,%xmm8
12517 69,15,198,192,0, //shufps $0x0,%xmm8,%xmm8
12518 65,15,40,201, //movaps %xmm9,%xmm1
12519 15,198,201,0, //shufps $0x0,%xmm1,%xmm1
12520 68,15,89,192, //mulps %xmm0,%xmm8
12521 68,15,88,193, //addps %xmm1,%xmm8
12522 15,40,203, //movaps %xmm3,%xmm1
12523 15,198,201,85, //shufps $0x55,%xmm1,%xmm1
12524 65,15,40,209, //movaps %xmm9,%xmm2
12525 15,198,210,85, //shufps $0x55,%xmm2,%xmm2
12526 15,89,200, //mulps %xmm0,%xmm1
12527 15,88,202, //addps %xmm2,%xmm1
12528 15,40,211, //movaps %xmm3,%xmm2
12529 15,198,210,170, //shufps $0xaa,%xmm2,%xmm2
12530 69,15,40,209, //movaps %xmm9,%xmm10
12531 69,15,198,210,170, //shufps $0xaa,%xmm10,%xmm10
12532 15,89,208, //mulps %xmm0,%xmm2
12533 65,15,88,210, //addps %xmm10,%xmm2
12534 15,198,219,255, //shufps $0xff,%xmm3,%xmm3
12535 69,15,198,201,255, //shufps $0xff,%xmm9,%xmm9
12536 15,89,216, //mulps %xmm0,%xmm3
12537 65,15,88,217, //addps %xmm9,%xmm3
12538 72,173, //lods %ds:(%rsi),%rax
12539 65,15,40,192, //movaps %xmm8,%xmm0
12540 255,224, //jmpq *%rax
12541};
12542#endif