blob: ca199c9fc6cab531e75699025e3519e38c2a1b08 [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein5cdeb392020-02-10 12:10:36 -0600214 values (originally 16):
3 v0 = splat 3B808081 (0.0039215689)
4 v1 = load8 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -06005 v2 = to_f32 v1
Mike Klein5cdeb392020-02-10 12:10:36 -06006 v3 = mul_f32 v0 v2
Mike Klein57bdb242020-01-08 15:25:07 -06007 v4 = load8 arg(1)
8 v5 = to_f32 v4
Mike Klein5cdeb392020-02-10 12:10:36 -06009 v6 = mul_f32 v0 v5
Mike Klein57bdb242020-01-08 15:25:07 -060010 v7 = splat 3F800000 (1)
11 v8 = sub_f32 v7 v3
12 v9 = mad_f32 v6 v8 v3
Mike Klein5cdeb392020-02-10 12:10:36 -060013 v10 = splat 437F0000 (255)
14 v11 = mul_f32 v9 v10
15 v12 = round v11
16 store8 arg(1) v12
Mike Klein8c1e0ef2019-11-12 09:07:23 -060017
Mike Klein5cdeb392020-02-10 12:10:36 -0600186 registers, 14 instructions:
190 r0 = splat 3B808081 (0.0039215689)
201 r1 = splat 3F800000 (1)
212 r2 = splat 437F0000 (255)
Mike Klein8c1e0ef2019-11-12 09:07:23 -060022loop:
Mike Klein5cdeb392020-02-10 12:10:36 -0600233 r3 = load8 arg(0)
244 r3 = to_f32 r3
255 r3 = mul_f32 r0 r3
266 r4 = load8 arg(1)
277 r4 = to_f32 r4
288 r4 = mul_f32 r0 r4
299 r5 = sub_f32 r1 r3
3010 r3 = mad_f32 r4 r5 r3
3111 r3 = mul_f32 r3 r2
3212 r3 = round r3
3313 store8 arg(1) r3
Mike Klein8c1e0ef2019-11-12 09:07:23 -060034
Mike Klein5cdeb392020-02-10 12:10:36 -060035A8 over G8
3620 values (originally 22):
37 v0 = splat 3B808081 (0.0039215689)
38 v1 = load8 arg(1)
Mike Kleina6434a52020-01-08 14:06:52 -060039 v2 = to_f32 v1
Mike Klein5cdeb392020-02-10 12:10:36 -060040 v3 = mul_f32 v0 v2
41 v4 = load8 arg(0)
42 v5 = to_f32 v4
43 v6 = mul_f32 v0 v5
44 v7 = splat 3F800000 (1)
45 v8 = sub_f32 v7 v6
46 v9 = mul_f32 v3 v8
47 v10 = splat 3E59B3D0 (0.21259999)
48 v11 = splat 3F371759 (0.71520001)
49 v12 = splat 3D93DD98 (0.0722)
50 v13 = mul_f32 v9 v12
51 v14 = mad_f32 v9 v11 v13
52 v15 = mad_f32 v9 v10 v14
53 v16 = splat 437F0000 (255)
54 v17 = mul_f32 v15 v16
55 v18 = round v17
56 store8 arg(1) v18
57
588 registers, 20 instructions:
590 r0 = splat 3B808081 (0.0039215689)
601 r1 = splat 3F800000 (1)
612 r2 = splat 3E59B3D0 (0.21259999)
623 r3 = splat 3F371759 (0.71520001)
634 r4 = splat 3D93DD98 (0.0722)
645 r5 = splat 437F0000 (255)
65loop:
666 r6 = load8 arg(1)
677 r6 = to_f32 r6
688 r6 = mul_f32 r0 r6
699 r7 = load8 arg(0)
7010 r7 = to_f32 r7
7111 r7 = mul_f32 r0 r7
7212 r7 = sub_f32 r1 r7
7313 r7 = mul_f32 r6 r7
7414 r6 = mul_f32 r7 r4
7515 r6 = mad_f32 r7 r3 r6
7616 r6 = mad_f32 r7 r2 r6
7717 r6 = mul_f32 r6 r5
7818 r6 = round r6
7919 store8 arg(1) r6
80
81A8 over RGBA_8888
8239 values (originally 40):
83 v0 = splat 3B808081 (0.0039215689)
84 v1 = load32 arg(1)
85 v2 = splat FF (3.5733111e-43)
86 v3 = bit_and v2 v1
87 v4 = to_f32 v3
88 v5 = mul_f32 v0 v4
89 v6 = load8 arg(0)
90 v7 = to_f32 v6
91 v8 = mul_f32 v0 v7
92 v9 = splat 3F800000 (1)
93 v10 = sub_f32 v9 v8
94 v11 = mul_f32 v5 v10
95 v12 = splat 437F0000 (255)
96 v13 = mul_f32 v11 v12
97 v14 = round v13
98 v15 = shr_i32 v1 8
99 v16 = bit_and v2 v15
100 v17 = to_f32 v16
101 v18 = mul_f32 v0 v17
102 v19 = mul_f32 v18 v10
103 v20 = mul_f32 v19 v12
104 v21 = round v20
105 v22 = pack v14 v21 8
106 v23 = shr_i32 v1 16
107 v24 = bit_and v2 v23
108 v25 = to_f32 v24
109 v26 = mul_f32 v0 v25
110 v27 = mul_f32 v26 v10
111 v28 = mul_f32 v27 v12
112 v29 = round v28
113 v30 = shr_i32 v1 24
114 v31 = to_f32 v30
115 v32 = mul_f32 v0 v31
116 v33 = mad_f32 v32 v10 v8
117 v34 = mul_f32 v33 v12
118 v35 = round v34
119 v36 = pack v29 v35 8
120 v37 = pack v22 v36 16
121 store32 arg(1) v37
122
1239 registers, 39 instructions:
1240 r0 = splat 3B808081 (0.0039215689)
1251 r1 = splat FF (3.5733111e-43)
1262 r2 = splat 3F800000 (1)
1273 r3 = splat 437F0000 (255)
128loop:
1294 r4 = load32 arg(1)
1305 r5 = bit_and r1 r4
1316 r5 = to_f32 r5
1327 r5 = mul_f32 r0 r5
1338 r6 = load8 arg(0)
1349 r6 = to_f32 r6
13510 r6 = mul_f32 r0 r6
13611 r7 = sub_f32 r2 r6
13712 r5 = mul_f32 r5 r7
13813 r5 = mul_f32 r5 r3
13914 r5 = round r5
14015 r8 = shr_i32 r4 8
14116 r8 = bit_and r1 r8
14217 r8 = to_f32 r8
14318 r8 = mul_f32 r0 r8
14419 r8 = mul_f32 r8 r7
14520 r8 = mul_f32 r8 r3
14621 r8 = round r8
14722 r8 = pack r5 r8 8
14823 r5 = shr_i32 r4 16
14924 r5 = bit_and r1 r5
15025 r5 = to_f32 r5
15126 r5 = mul_f32 r0 r5
15227 r5 = mul_f32 r5 r7
15328 r5 = mul_f32 r5 r3
15429 r5 = round r5
15530 r4 = shr_i32 r4 24
15631 r4 = to_f32 r4
15732 r4 = mul_f32 r0 r4
15833 r6 = mad_f32 r4 r7 r6
15934 r6 = mul_f32 r6 r3
16035 r6 = round r6
16136 r6 = pack r5 r6 8
16237 r6 = pack r8 r6 16
16338 store32 arg(1) r6
164
165G8 over A8
16611 values (originally 15):
167 v0 = splat 3F800000 (1)
168 v1 = splat 0 (0)
169 v2 = splat 3B808081 (0.0039215689)
170 v3 = load8 arg(1)
171 v4 = to_f32 v3
172 v5 = mul_f32 v2 v4
173 v6 = mad_f32 v5 v1 v0
174 v7 = splat 437F0000 (255)
175 v8 = mul_f32 v6 v7
176 v9 = round v8
177 store8 arg(1) v9
178
1795 registers, 11 instructions:
1800 r0 = splat 3F800000 (1)
1811 r1 = splat 0 (0)
1822 r2 = splat 3B808081 (0.0039215689)
1833 r3 = splat 437F0000 (255)
184loop:
1854 r4 = load8 arg(1)
1865 r4 = to_f32 r4
1876 r4 = mul_f32 r2 r4
1887 r4 = mad_f32 r4 r1 r0
1898 r4 = mul_f32 r4 r3
1909 r4 = round r4
19110 store8 arg(1) r4
192
193G8 over G8
19419 values (originally 20):
195 v0 = splat 3B808081 (0.0039215689)
196 v1 = load8 arg(0)
197 v2 = to_f32 v1
198 v3 = mul_f32 v0 v2
Mike Kleina6434a52020-01-08 14:06:52 -0600199 v4 = load8 arg(1)
200 v5 = to_f32 v4
Mike Klein5cdeb392020-02-10 12:10:36 -0600201 v6 = mul_f32 v0 v5
202 v7 = splat 0 (0)
203 v8 = mad_f32 v6 v7 v3
204 v9 = splat 3E59B3D0 (0.21259999)
205 v10 = splat 3F371759 (0.71520001)
206 v11 = splat 3D93DD98 (0.0722)
207 v12 = mul_f32 v8 v11
208 v13 = mad_f32 v8 v10 v12
209 v14 = mad_f32 v8 v9 v13
210 v15 = splat 437F0000 (255)
211 v16 = mul_f32 v14 v15
212 v17 = round v16
213 store8 arg(1) v17
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600214
Mike Klein5cdeb392020-02-10 12:10:36 -06002158 registers, 19 instructions:
2160 r0 = splat 3B808081 (0.0039215689)
2171 r1 = splat 0 (0)
2182 r2 = splat 3E59B3D0 (0.21259999)
2193 r3 = splat 3F371759 (0.71520001)
2204 r4 = splat 3D93DD98 (0.0722)
2215 r5 = splat 437F0000 (255)
222loop:
2236 r6 = load8 arg(0)
2247 r6 = to_f32 r6
2258 r6 = mul_f32 r0 r6
2269 r7 = load8 arg(1)
22710 r7 = to_f32 r7
22811 r7 = mul_f32 r0 r7
22912 r6 = mad_f32 r7 r1 r6
23013 r7 = mul_f32 r6 r4
23114 r7 = mad_f32 r6 r3 r7
23215 r7 = mad_f32 r6 r2 r7
23316 r7 = mul_f32 r7 r5
23417 r7 = round r7
23518 store8 arg(1) r7
236
237G8 over RGBA_8888
23839 values (originally 39):
239 v0 = splat 3B808081 (0.0039215689)
240 v1 = load8 arg(0)
241 v2 = to_f32 v1
242 v3 = mul_f32 v0 v2
243 v4 = load32 arg(1)
244 v5 = splat FF (3.5733111e-43)
245 v6 = bit_and v5 v4
246 v7 = to_f32 v6
247 v8 = mul_f32 v0 v7
248 v9 = splat 0 (0)
249 v10 = mad_f32 v8 v9 v3
250 v11 = splat 437F0000 (255)
251 v12 = mul_f32 v10 v11
252 v13 = round v12
253 v14 = shr_i32 v4 8
254 v15 = bit_and v5 v14
255 v16 = to_f32 v15
256 v17 = mul_f32 v0 v16
257 v18 = mad_f32 v17 v9 v3
258 v19 = mul_f32 v18 v11
259 v20 = round v19
260 v21 = pack v13 v20 8
261 v22 = shr_i32 v4 16
262 v23 = bit_and v5 v22
263 v24 = to_f32 v23
264 v25 = mul_f32 v0 v24
265 v26 = mad_f32 v25 v9 v3
266 v27 = mul_f32 v26 v11
267 v28 = round v27
268 v29 = splat 3F800000 (1)
269 v30 = shr_i32 v4 24
270 v31 = to_f32 v30
271 v32 = mul_f32 v0 v31
272 v33 = mad_f32 v32 v9 v29
273 v34 = mul_f32 v33 v11
274 v35 = round v34
275 v36 = pack v28 v35 8
276 v37 = pack v21 v36 16
277 store32 arg(1) v37
278
2799 registers, 39 instructions:
2800 r0 = splat 3B808081 (0.0039215689)
2811 r1 = splat FF (3.5733111e-43)
2822 r2 = splat 0 (0)
2833 r3 = splat 437F0000 (255)
2844 r4 = splat 3F800000 (1)
285loop:
2865 r5 = load8 arg(0)
2876 r5 = to_f32 r5
2887 r5 = mul_f32 r0 r5
2898 r6 = load32 arg(1)
2909 r7 = bit_and r1 r6
29110 r7 = to_f32 r7
29211 r7 = mul_f32 r0 r7
29312 r7 = mad_f32 r7 r2 r5
29413 r7 = mul_f32 r7 r3
29514 r7 = round r7
29615 r8 = shr_i32 r6 8
29716 r8 = bit_and r1 r8
29817 r8 = to_f32 r8
29918 r8 = mul_f32 r0 r8
30019 r8 = mad_f32 r8 r2 r5
30120 r8 = mul_f32 r8 r3
30221 r8 = round r8
30322 r8 = pack r7 r8 8
30423 r7 = shr_i32 r6 16
30524 r7 = bit_and r1 r7
30625 r7 = to_f32 r7
30726 r7 = mul_f32 r0 r7
30827 r5 = mad_f32 r7 r2 r5
30928 r5 = mul_f32 r5 r3
31029 r5 = round r5
31130 r6 = shr_i32 r6 24
31231 r6 = to_f32 r6
31332 r6 = mul_f32 r0 r6
31433 r6 = mad_f32 r6 r2 r4
31534 r6 = mul_f32 r6 r3
31635 r6 = round r6
31736 r6 = pack r5 r6 8
31837 r6 = pack r8 r6 16
31938 store32 arg(1) r6
320
321RGBA_8888 over A8
32215 values (originally 31):
323 v0 = splat 3B808081 (0.0039215689)
324 v1 = load32 arg(0)
325 v2 = shr_i32 v1 24
326 v3 = to_f32 v2
327 v4 = mul_f32 v0 v3
328 v5 = load8 arg(1)
329 v6 = to_f32 v5
330 v7 = mul_f32 v0 v6
331 v8 = splat 3F800000 (1)
332 v9 = sub_f32 v8 v4
333 v10 = mad_f32 v7 v9 v4
334 v11 = splat 437F0000 (255)
335 v12 = mul_f32 v10 v11
336 v13 = round v12
337 store8 arg(1) v13
338
3396 registers, 15 instructions:
3400 r0 = splat 3B808081 (0.0039215689)
3411 r1 = splat 3F800000 (1)
3422 r2 = splat 437F0000 (255)
Mike Kleina6307322019-06-07 15:44:26 -0500343loop:
Mike Kleina6434a52020-01-08 14:06:52 -06003443 r3 = load32 arg(0)
Mike Klein5cdeb392020-02-10 12:10:36 -06003454 r3 = shr_i32 r3 24
3465 r3 = to_f32 r3
3476 r3 = mul_f32 r0 r3
3487 r4 = load8 arg(1)
3498 r4 = to_f32 r4
3509 r4 = mul_f32 r0 r4
35110 r5 = sub_f32 r1 r3
35211 r3 = mad_f32 r4 r5 r3
35312 r3 = mul_f32 r3 r2
35413 r3 = round r3
35514 store8 arg(1) r3
356
357RGBA_8888 over G8
35835 values (originally 36):
359 v0 = splat 3B808081 (0.0039215689)
360 v1 = load32 arg(0)
361 v2 = splat FF (3.5733111e-43)
362 v3 = bit_and v2 v1
363 v4 = to_f32 v3
364 v5 = mul_f32 v0 v4
365 v6 = load8 arg(1)
366 v7 = to_f32 v6
367 v8 = mul_f32 v0 v7
368 v9 = shr_i32 v1 24
369 v10 = to_f32 v9
370 v11 = mul_f32 v0 v10
371 v12 = splat 3F800000 (1)
372 v13 = sub_f32 v12 v11
373 v14 = mad_f32 v8 v13 v5
374 v15 = splat 3E59B3D0 (0.21259999)
375 v16 = shr_i32 v1 8
376 v17 = bit_and v2 v16
377 v18 = to_f32 v17
378 v19 = mul_f32 v0 v18
379 v20 = mad_f32 v8 v13 v19
380 v21 = splat 3F371759 (0.71520001)
381 v22 = shr_i32 v1 16
382 v23 = bit_and v2 v22
383 v24 = to_f32 v23
384 v25 = mul_f32 v0 v24
385 v26 = mad_f32 v8 v13 v25
386 v27 = splat 3D93DD98 (0.0722)
387 v28 = mul_f32 v26 v27
388 v29 = mad_f32 v20 v21 v28
389 v30 = mad_f32 v14 v15 v29
390 v31 = splat 437F0000 (255)
391 v32 = mul_f32 v30 v31
392 v33 = round v32
393 store8 arg(1) v33
394
39512 registers, 35 instructions:
3960 r0 = splat 3B808081 (0.0039215689)
3971 r1 = splat FF (3.5733111e-43)
3982 r2 = splat 3F800000 (1)
3993 r3 = splat 3E59B3D0 (0.21259999)
4004 r4 = splat 3F371759 (0.71520001)
4015 r5 = splat 3D93DD98 (0.0722)
4026 r6 = splat 437F0000 (255)
403loop:
4047 r7 = load32 arg(0)
4058 r8 = bit_and r1 r7
4069 r8 = to_f32 r8
40710 r8 = mul_f32 r0 r8
40811 r9 = load8 arg(1)
40912 r9 = to_f32 r9
41013 r9 = mul_f32 r0 r9
41114 r10 = shr_i32 r7 24
41215 r10 = to_f32 r10
41316 r10 = mul_f32 r0 r10
41417 r10 = sub_f32 r2 r10
41518 r8 = mad_f32 r9 r10 r8
41619 r11 = shr_i32 r7 8
41720 r11 = bit_and r1 r11
41821 r11 = to_f32 r11
41922 r11 = mul_f32 r0 r11
42023 r11 = mad_f32 r9 r10 r11
42124 r7 = shr_i32 r7 16
42225 r7 = bit_and r1 r7
42326 r7 = to_f32 r7
42427 r7 = mul_f32 r0 r7
42528 r7 = mad_f32 r9 r10 r7
42629 r7 = mul_f32 r7 r5
42730 r7 = mad_f32 r11 r4 r7
42831 r7 = mad_f32 r8 r3 r7
42932 r7 = mul_f32 r7 r6
43033 r7 = round r7
43134 store8 arg(1) r7
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600432
433RGBA_8888 over RGBA_8888
Mike Klein5cdeb392020-02-10 12:10:36 -060043451 values (originally 51):
435 v0 = splat 3B808081 (0.0039215689)
436 v1 = load32 arg(0)
437 v2 = splat FF (3.5733111e-43)
438 v3 = bit_and v2 v1
439 v4 = to_f32 v3
440 v5 = mul_f32 v0 v4
441 v6 = load32 arg(1)
442 v7 = bit_and v2 v6
443 v8 = to_f32 v7
444 v9 = mul_f32 v0 v8
445 v10 = shr_i32 v1 24
446 v11 = to_f32 v10
447 v12 = mul_f32 v0 v11
448 v13 = splat 3F800000 (1)
449 v14 = sub_f32 v13 v12
450 v15 = mad_f32 v9 v14 v5
451 v16 = splat 437F0000 (255)
452 v17 = mul_f32 v15 v16
453 v18 = round v17
454 v19 = shr_i32 v1 8
455 v20 = bit_and v2 v19
456 v21 = to_f32 v20
457 v22 = mul_f32 v0 v21
458 v23 = shr_i32 v6 8
459 v24 = bit_and v2 v23
460 v25 = to_f32 v24
461 v26 = mul_f32 v0 v25
462 v27 = mad_f32 v26 v14 v22
463 v28 = mul_f32 v27 v16
464 v29 = round v28
465 v30 = pack v18 v29 8
466 v31 = shr_i32 v1 16
467 v32 = bit_and v2 v31
468 v33 = to_f32 v32
469 v34 = mul_f32 v0 v33
470 v35 = shr_i32 v6 16
471 v36 = bit_and v2 v35
472 v37 = to_f32 v36
473 v38 = mul_f32 v0 v37
474 v39 = mad_f32 v38 v14 v34
475 v40 = mul_f32 v39 v16
476 v41 = round v40
477 v42 = shr_i32 v6 24
478 v43 = to_f32 v42
479 v44 = mul_f32 v0 v43
480 v45 = mad_f32 v44 v14 v12
481 v46 = mul_f32 v45 v16
482 v47 = round v46
483 v48 = pack v41 v47 8
484 v49 = pack v30 v48 16
485 store32 arg(1) v49
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600486
Mike Klein5cdeb392020-02-10 12:10:36 -060048711 registers, 51 instructions:
4880 r0 = splat 3B808081 (0.0039215689)
4891 r1 = splat FF (3.5733111e-43)
4902 r2 = splat 3F800000 (1)
4913 r3 = splat 437F0000 (255)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600492loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06004934 r4 = load32 arg(0)
4945 r5 = bit_and r1 r4
4956 r5 = to_f32 r5
4967 r5 = mul_f32 r0 r5
4978 r6 = load32 arg(1)
4989 r7 = bit_and r1 r6
49910 r7 = to_f32 r7
50011 r7 = mul_f32 r0 r7
50112 r8 = shr_i32 r4 24
50213 r8 = to_f32 r8
50314 r8 = mul_f32 r0 r8
50415 r9 = sub_f32 r2 r8
50516 r5 = mad_f32 r7 r9 r5
50617 r5 = mul_f32 r5 r3
50718 r5 = round r5
50819 r7 = shr_i32 r4 8
50920 r7 = bit_and r1 r7
51021 r7 = to_f32 r7
51122 r7 = mul_f32 r0 r7
51223 r10 = shr_i32 r6 8
51324 r10 = bit_and r1 r10
51425 r10 = to_f32 r10
51526 r10 = mul_f32 r0 r10
51627 r7 = mad_f32 r10 r9 r7
51728 r7 = mul_f32 r7 r3
51829 r7 = round r7
51930 r7 = pack r5 r7 8
52031 r4 = shr_i32 r4 16
52132 r4 = bit_and r1 r4
52233 r4 = to_f32 r4
52334 r4 = mul_f32 r0 r4
52435 r5 = shr_i32 r6 16
52536 r5 = bit_and r1 r5
52637 r5 = to_f32 r5
52738 r5 = mul_f32 r0 r5
52839 r4 = mad_f32 r5 r9 r4
52940 r4 = mul_f32 r4 r3
53041 r4 = round r4
53142 r6 = shr_i32 r6 24
53243 r6 = to_f32 r6
53344 r6 = mul_f32 r0 r6
53445 r8 = mad_f32 r6 r9 r8
53546 r8 = mul_f32 r8 r3
53647 r8 = round r8
53748 r8 = pack r4 r8 8
53849 r8 = pack r7 r8 16
53950 store32 arg(1) r8
Mike Klein267f5072019-06-03 16:27:46 -0500540
Mike Klein397fc882019-06-20 11:37:10 -0500541I32 (Naive) 8888 over 8888
Mike Klein5cdeb392020-02-10 12:10:36 -060054233 values (originally 33):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500543 v0 = load32 arg(0)
Mike Klein5cdeb392020-02-10 12:10:36 -0600544 v1 = splat FF (3.5733111e-43)
545 v2 = bit_and v1 v0
546 v3 = load32 arg(1)
547 v4 = bit_and v1 v3
548 v5 = shr_i32 v0 24
549 v6 = splat 100 (3.5873241e-43)
550 v7 = sub_i32 v6 v5
551 v8 = mul_i32 v4 v7
552 v9 = shr_i32 v8 8
553 v10 = add_i32 v2 v9
554 v11 = shr_i32 v0 8
555 v12 = bit_and v1 v11
556 v13 = shr_i32 v3 8
557 v14 = bit_and v1 v13
558 v15 = mul_i32 v14 v7
559 v16 = shr_i32 v15 8
560 v17 = add_i32 v12 v16
561 v18 = pack v10 v17 8
562 v19 = shr_i32 v0 16
563 v20 = bit_and v1 v19
564 v21 = shr_i32 v3 16
565 v22 = bit_and v1 v21
566 v23 = mul_i32 v22 v7
567 v24 = shr_i32 v23 8
568 v25 = add_i32 v20 v24
569 v26 = shr_i32 v3 24
570 v27 = mul_i32 v26 v7
571 v28 = shr_i32 v27 8
572 v29 = add_i32 v5 v28
573 v30 = pack v25 v29 8
574 v31 = pack v18 v30 16
575 store32 arg(1) v31
Mike Kleinaab45b52019-07-02 15:39:23 -0500576
Mike Klein5cdeb392020-02-10 12:10:36 -06005779 registers, 33 instructions:
5780 r0 = splat FF (3.5733111e-43)
5791 r1 = splat 100 (3.5873241e-43)
Mike Klein397fc882019-06-20 11:37:10 -0500580loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06005812 r2 = load32 arg(0)
5823 r3 = bit_and r0 r2
5834 r4 = load32 arg(1)
5845 r5 = bit_and r0 r4
5856 r6 = shr_i32 r2 24
5867 r7 = sub_i32 r1 r6
5878 r5 = mul_i32 r5 r7
5889 r5 = shr_i32 r5 8
58910 r5 = add_i32 r3 r5
59011 r3 = shr_i32 r2 8
59112 r3 = bit_and r0 r3
59213 r8 = shr_i32 r4 8
59314 r8 = bit_and r0 r8
59415 r8 = mul_i32 r8 r7
59516 r8 = shr_i32 r8 8
59617 r8 = add_i32 r3 r8
59718 r8 = pack r5 r8 8
59819 r2 = shr_i32 r2 16
59920 r2 = bit_and r0 r2
60021 r5 = shr_i32 r4 16
60122 r5 = bit_and r0 r5
60223 r5 = mul_i32 r5 r7
60324 r5 = shr_i32 r5 8
60425 r5 = add_i32 r2 r5
60526 r4 = shr_i32 r4 24
60627 r7 = mul_i32 r4 r7
60728 r7 = shr_i32 r7 8
60829 r7 = add_i32 r6 r7
60930 r7 = pack r5 r7 8
61031 r7 = pack r8 r7 16
61132 store32 arg(1) r7
Mike Klein397fc882019-06-20 11:37:10 -0500612
Mike Klein7b7077c2019-06-03 17:10:59 -0500613I32 8888 over 8888
Mike Klein5cdeb392020-02-10 12:10:36 -060061429 values (originally 29):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500615 v0 = load32 arg(0)
Mike Klein5cdeb392020-02-10 12:10:36 -0600616 v1 = splat FF (3.5733111e-43)
617 v2 = bit_and v0 v1
618 v3 = load32 arg(1)
619 v4 = bit_and v3 v1
620 v5 = shr_i32 v0 24
621 v6 = splat 100 (3.5873241e-43)
622 v7 = sub_i32 v6 v5
623 v8 = mul_i16x2 v4 v7
624 v9 = shr_i32 v8 8
625 v10 = add_i32 v2 v9
626 v11 = bytes v0 2
627 v12 = bytes v3 2
628 v13 = mul_i16x2 v12 v7
629 v14 = shr_i32 v13 8
630 v15 = add_i32 v11 v14
631 v16 = pack v10 v15 8
632 v17 = bytes v0 3
633 v18 = bytes v3 3
634 v19 = mul_i16x2 v18 v7
635 v20 = shr_i32 v19 8
636 v21 = add_i32 v17 v20
637 v22 = shr_i32 v3 24
638 v23 = mul_i16x2 v22 v7
639 v24 = shr_i32 v23 8
640 v25 = add_i32 v5 v24
641 v26 = pack v21 v25 8
642 v27 = pack v16 v26 16
643 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500644
Mike Klein5cdeb392020-02-10 12:10:36 -06006459 registers, 29 instructions:
6460 r0 = splat FF (3.5733111e-43)
6471 r1 = splat 100 (3.5873241e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500648loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06006492 r2 = load32 arg(0)
6503 r3 = bit_and r2 r0
6514 r4 = load32 arg(1)
6525 r5 = bit_and r4 r0
6536 r6 = shr_i32 r2 24
6547 r7 = sub_i32 r1 r6
6558 r5 = mul_i16x2 r5 r7
6569 r5 = shr_i32 r5 8
65710 r5 = add_i32 r3 r5
65811 r3 = bytes r2 2
65912 r8 = bytes r4 2
66013 r8 = mul_i16x2 r8 r7
66114 r8 = shr_i32 r8 8
66215 r8 = add_i32 r3 r8
66316 r8 = pack r5 r8 8
66417 r2 = bytes r2 3
66518 r5 = bytes r4 3
66619 r5 = mul_i16x2 r5 r7
66720 r5 = shr_i32 r5 8
66821 r5 = add_i32 r2 r5
66922 r4 = shr_i32 r4 24
67023 r7 = mul_i16x2 r4 r7
67124 r7 = shr_i32 r7 8
67225 r7 = add_i32 r6 r7
67326 r7 = pack r5 r7 8
67427 r7 = pack r8 r7 16
67528 store32 arg(1) r7
Mike Klein821f5e82019-06-13 10:56:51 -0500676
677I32 (SWAR) 8888 over 8888
Mike Klein5cdeb392020-02-10 12:10:36 -060067815 values (originally 15):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500679 v0 = load32 arg(0)
680 v1 = bytes v0 404
Mike Klein5e533c92019-07-22 13:44:54 -0500681 v2 = splat 1000100 (2.3510604e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500682 v3 = sub_i16x2 v2 v1
683 v4 = load32 arg(1)
Mike Klein5cdeb392020-02-10 12:10:36 -0600684 v5 = splat FF00FF (2.3418409e-38)
685 v6 = bit_and v4 v5
686 v7 = mul_i16x2 v6 v3
687 v8 = shr_i16x2 v7 8
688 v9 = shr_i16x2 v4 8
689 v10 = mul_i16x2 v9 v3
690 v11 = bit_clear v10 v5
691 v12 = bit_or v8 v11
692 v13 = add_i32 v0 v12
693 store32 arg(1) v13
Mike Kleinaab45b52019-07-02 15:39:23 -0500694
Mike Klein5cdeb392020-02-10 12:10:36 -06006956 registers, 15 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006960 r0 = splat 1000100 (2.3510604e-38)
Mike Klein5cdeb392020-02-10 12:10:36 -06006971 r1 = splat FF00FF (2.3418409e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500698loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06006992 r2 = load32 arg(0)
7003 r3 = bytes r2 404
7014 r3 = sub_i16x2 r0 r3
7025 r4 = load32 arg(1)
7036 r5 = bit_and r4 r1
7047 r5 = mul_i16x2 r5 r3
7058 r5 = shr_i16x2 r5 8
7069 r4 = shr_i16x2 r4 8
70710 r3 = mul_i16x2 r4 r3
70811 r3 = bit_clear r3 r1
70912 r3 = bit_or r5 r3
71013 r3 = add_i32 r2 r3
71114 store32 arg(1) r3
Mike Klein7b7077c2019-06-03 17:10:59 -0500712
Mike Kleined9b1f12020-02-06 13:02:32 -06007136 values (originally 6):
Mike Klein0f61c122019-10-16 10:46:01 -0500714 v0 = splat 1 (1.4012985e-45)
715 v1 = splat 2 (2.8025969e-45)
Mike Kleinf9963112019-08-08 15:13:25 -0400716 v2 = add_i32 v0 v1
717 v3 = load32 arg(0)
718 v4 = mul_i32 v3 v2
719 store32 arg(0) v4
720
7212 registers, 6 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06007220 r0 = splat 1 (1.4012985e-45)
7231 r1 = splat 2 (2.8025969e-45)
7242 r1 = add_i32 r0 r1
Mike Kleinf9963112019-08-08 15:13:25 -0400725loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06007263 r0 = load32 arg(0)
7274 r0 = mul_i32 r0 r1
7285 store32 arg(0) r0
Mike Kleinf9963112019-08-08 15:13:25 -0400729
Mike Klein5cdeb392020-02-10 12:10:36 -060073023 values (originally 23):
731 v0 = splat FF (3.5733111e-43)
732 v1 = load32 arg(0)
733 v2 = bit_and v0 v1
734 v3 = load32 arg(1)
735 v4 = bit_and v0 v3
736 v5 = add_i32 v2 v4
737 v6 = shr_i32 v1 8
738 v7 = bit_and v0 v6
739 v8 = shr_i32 v3 8
740 v9 = bit_and v0 v8
741 v10 = add_i32 v7 v9
742 v11 = pack v5 v10 8
743 v12 = shr_i32 v1 16
744 v13 = bit_and v0 v12
745 v14 = shr_i32 v3 16
746 v15 = bit_and v0 v14
747 v16 = add_i32 v13 v15
748 v17 = shr_i32 v1 24
749 v18 = shr_i32 v3 24
750 v19 = add_i32 v17 v18
751 v20 = pack v16 v19 8
752 v21 = pack v11 v20 16
753 store32 arg(1) v21
Mike Kleind48488b2019-10-22 12:27:58 -0500754
Mike Klein5cdeb392020-02-10 12:10:36 -06007556 registers, 23 instructions:
7560 r0 = splat FF (3.5733111e-43)
Mike Kleind48488b2019-10-22 12:27:58 -0500757loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06007581 r1 = load32 arg(0)
7592 r2 = bit_and r0 r1
7603 r3 = load32 arg(1)
7614 r4 = bit_and r0 r3
7625 r4 = add_i32 r2 r4
7636 r2 = shr_i32 r1 8
7647 r2 = bit_and r0 r2
7658 r5 = shr_i32 r3 8
7669 r5 = bit_and r0 r5
76710 r5 = add_i32 r2 r5
76811 r5 = pack r4 r5 8
76912 r4 = shr_i32 r1 16
77013 r4 = bit_and r0 r4
77114 r2 = shr_i32 r3 16
77215 r2 = bit_and r0 r2
77316 r2 = add_i32 r4 r2
77417 r1 = shr_i32 r1 24
77518 r3 = shr_i32 r3 24
77619 r3 = add_i32 r1 r3
77720 r3 = pack r2 r3 8
77821 r3 = pack r5 r3 16
77922 store32 arg(1) r3
Mike Kleind48488b2019-10-22 12:27:58 -0500780