blob: bb98672a10dc65a1172201371cadf2921c3c2633 [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600212 values:
3 v0 = load8 arg(0)
4 v1 = to_f32 v0
5 v2 = mul_f32 v1 3B808081 (0.0039215689)
6 v3 = load8 arg(1)
7 v4 = to_f32 v3
8 v5 = mul_f32 v4 3B808081 (0.0039215689)
9 v6 = splat 3F800000 (1)
10 v7 = sub_f32 v6 v2
11 v8 = mad_f32 v5 v7 v2
12 v9 = mul_f32 v8 437F0000 (255)
13 v10 = round v9
14 store8 arg(1) v10
Mike Klein22ea7e92019-06-10 12:05:48 -050015
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600164 registers, 12 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -0600170 r0 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -050018loop:
Mike Kleinb5c43552020-01-07 11:39:30 -0600191 r1 = load8 arg(0)
202 r1 = to_f32 r1
213 r1 = mul_f32 r1 3B808081 (0.0039215689)
224 r2 = load8 arg(1)
235 r2 = to_f32 r2
246 r2 = mul_f32 r2 3B808081 (0.0039215689)
257 r3 = sub_f32 r0 r1
268 r1 = mad_f32 r2 r3 r1
279 r1 = mul_f32 r1 437F0000 (255)
2810 r1 = round r1
2911 store8 arg(1) r1
Mike Klein8c1e0ef2019-11-12 09:07:23 -060030
31A8 over G8
3217 values:
33 v0 = load8 arg(1)
34 v1 = to_f32 v0
35 v2 = mul_f32 v1 3B808081 (0.0039215689)
36 v3 = load8 arg(0)
37 v4 = to_f32 v3
38 v5 = mul_f32 v4 3B808081 (0.0039215689)
39 v6 = splat 3F800000 (1)
40 v7 = sub_f32 v6 v5
41 v8 = mul_f32 v2 v7
42 v9 = splat 3E59B3D0 (0.21259999)
43 v10 = splat 3F371759 (0.71520001)
44 v11 = mul_f32 v8 3D93DD98 (0.0722)
45 v12 = mad_f32 v8 v10 v11
46 v13 = mad_f32 v8 v9 v12
47 v14 = mul_f32 v13 437F0000 (255)
48 v15 = round v14
49 store8 arg(1) v15
50
515 registers, 17 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -0600520 r0 = splat 3F800000 (1)
531 r1 = splat 3E59B3D0 (0.21259999)
542 r2 = splat 3F371759 (0.71520001)
Mike Klein8c1e0ef2019-11-12 09:07:23 -060055loop:
Mike Kleinb5c43552020-01-07 11:39:30 -0600563 r3 = load8 arg(1)
574 r3 = to_f32 r3
585 r3 = mul_f32 r3 3B808081 (0.0039215689)
596 r4 = load8 arg(0)
607 r4 = to_f32 r4
618 r4 = mul_f32 r4 3B808081 (0.0039215689)
629 r4 = sub_f32 r0 r4
6310 r4 = mul_f32 r3 r4
6411 r3 = mul_f32 r4 3D93DD98 (0.0722)
6512 r3 = mad_f32 r4 r2 r3
6613 r3 = mad_f32 r4 r1 r3
6714 r3 = mul_f32 r3 437F0000 (255)
6815 r3 = round r3
6916 store8 arg(1) r3
Mike Klein267f5072019-06-03 16:27:46 -050070
Mike Klein754bad32019-06-05 10:47:46 -050071A8 over RGBA_8888
Mike Kleina6434a52020-01-08 14:06:52 -06007237 values:
Mike Klein8c1e0ef2019-11-12 09:07:23 -060073 v0 = load32 arg(1)
Mike Kleina6434a52020-01-08 14:06:52 -060074 v1 = bit_and v0 FF
75 v2 = to_f32 v1
76 v3 = mul_f32 v2 3B808081 (0.0039215689)
77 v4 = load8 arg(0)
78 v5 = to_f32 v4
79 v6 = mul_f32 v5 3B808081 (0.0039215689)
80 v7 = splat 3F800000 (1)
81 v8 = sub_f32 v7 v6
82 v9 = mul_f32 v3 v8
83 v10 = mul_f32 v9 437F0000 (255)
84 v11 = round v10
85 v12 = shr_i32 v0 8
86 v13 = bit_and v12 FF
Mike Klein8c1e0ef2019-11-12 09:07:23 -060087 v14 = to_f32 v13
88 v15 = mul_f32 v14 3B808081 (0.0039215689)
Mike Kleina6434a52020-01-08 14:06:52 -060089 v16 = mul_f32 v15 v8
Mike Klein8c1e0ef2019-11-12 09:07:23 -060090 v17 = mul_f32 v16 437F0000 (255)
91 v18 = round v17
Mike Kleina6434a52020-01-08 14:06:52 -060092 v19 = pack v11 v18 8
93 v20 = shr_i32 v0 16
94 v21 = bit_and v20 FF
95 v22 = to_f32 v21
96 v23 = mul_f32 v22 3B808081 (0.0039215689)
97 v24 = mul_f32 v23 v8
98 v25 = mul_f32 v24 437F0000 (255)
99 v26 = round v25
100 v27 = shr_i32 v0 24
101 v28 = bit_and v27 FF
102 v29 = to_f32 v28
103 v30 = mul_f32 v29 3B808081 (0.0039215689)
104 v31 = mad_f32 v30 v8 v6
105 v32 = mul_f32 v31 437F0000 (255)
106 v33 = round v32
107 v34 = pack v26 v33 8
108 v35 = pack v19 v34 16
109 store32 arg(1) v35
Mike Klein22ea7e92019-06-10 12:05:48 -0500110
Mike Kleina6434a52020-01-08 14:06:52 -06001116 registers, 37 instructions:
1120 r0 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -0500113loop:
Mike Kleina6434a52020-01-08 14:06:52 -06001141 r1 = load32 arg(1)
1152 r2 = bit_and r1 FF
1163 r2 = to_f32 r2
1174 r2 = mul_f32 r2 3B808081 (0.0039215689)
1185 r3 = load8 arg(0)
1196 r3 = to_f32 r3
1207 r3 = mul_f32 r3 3B808081 (0.0039215689)
1218 r4 = sub_f32 r0 r3
1229 r2 = mul_f32 r2 r4
12310 r2 = mul_f32 r2 437F0000 (255)
12411 r2 = round r2
12512 r5 = shr_i32 r1 8
12613 r5 = bit_and r5 FF
12714 r5 = to_f32 r5
12815 r5 = mul_f32 r5 3B808081 (0.0039215689)
12916 r5 = mul_f32 r5 r4
13017 r5 = mul_f32 r5 437F0000 (255)
13118 r5 = round r5
13219 r5 = pack r2 r5 8
13320 r2 = shr_i32 r1 16
13421 r2 = bit_and r2 FF
13522 r2 = to_f32 r2
13623 r2 = mul_f32 r2 3B808081 (0.0039215689)
13724 r2 = mul_f32 r2 r4
13825 r2 = mul_f32 r2 437F0000 (255)
13926 r2 = round r2
14027 r1 = shr_i32 r1 24
14128 r1 = bit_and r1 FF
14229 r1 = to_f32 r1
14330 r1 = mul_f32 r1 3B808081 (0.0039215689)
14431 r3 = mad_f32 r1 r4 r3
14532 r3 = mul_f32 r3 437F0000 (255)
14633 r3 = round r3
14734 r3 = pack r2 r3 8
14835 r3 = pack r5 r3 16
14936 store32 arg(1) r3
Mike Klein754bad32019-06-05 10:47:46 -0500150
151G8 over A8
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001529 values:
Mike Kleind48488b2019-10-22 12:27:58 -0500153 v0 = splat 3F800000 (1)
Mike Klein1cb05992019-11-21 11:46:58 -0600154 v1 = splat 0 (0)
155 v2 = load8 arg(1)
156 v3 = to_f32 v2
157 v4 = mul_f32 v3 3B808081 (0.0039215689)
158 v5 = mad_f32 v4 v1 v0
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600159 v6 = mul_f32 v5 437F0000 (255)
160 v7 = round v6
161 store8 arg(1) v7
Mike Klein22ea7e92019-06-10 12:05:48 -0500162
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001633 registers, 9 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06001640 r0 = splat 3F800000 (1)
1651 r1 = splat 0 (0)
Mike Klein754bad32019-06-05 10:47:46 -0500166loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06001672 r2 = load8 arg(1)
1683 r2 = to_f32 r2
1694 r2 = mul_f32 r2 3B808081 (0.0039215689)
1705 r2 = mad_f32 r2 r1 r0
1716 r2 = mul_f32 r2 437F0000 (255)
1727 r2 = round r2
1738 store8 arg(1) r2
Mike Klein754bad32019-06-05 10:47:46 -0500174
175G8 over G8
Mike Klein1cb05992019-11-21 11:46:58 -060017616 values:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600177 v0 = load8 arg(0)
178 v1 = to_f32 v0
179 v2 = mul_f32 v1 3B808081 (0.0039215689)
180 v3 = load8 arg(1)
Mike Kleind48488b2019-10-22 12:27:58 -0500181 v4 = to_f32 v3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600182 v5 = mul_f32 v4 3B808081 (0.0039215689)
Mike Klein1cb05992019-11-21 11:46:58 -0600183 v6 = splat 0 (0)
184 v7 = mad_f32 v5 v6 v2
185 v8 = splat 3E59B3D0 (0.21259999)
186 v9 = splat 3F371759 (0.71520001)
187 v10 = mul_f32 v7 3D93DD98 (0.0722)
188 v11 = mad_f32 v7 v9 v10
189 v12 = mad_f32 v7 v8 v11
190 v13 = mul_f32 v12 437F0000 (255)
191 v14 = round v13
192 store8 arg(1) v14
Mike Klein22ea7e92019-06-10 12:05:48 -0500193
Mike Klein1cb05992019-11-21 11:46:58 -06001945 registers, 16 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06001950 r0 = splat 0 (0)
1961 r1 = splat 3E59B3D0 (0.21259999)
1972 r2 = splat 3F371759 (0.71520001)
Mike Klein754bad32019-06-05 10:47:46 -0500198loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06001993 r3 = load8 arg(0)
2004 r3 = to_f32 r3
2015 r3 = mul_f32 r3 3B808081 (0.0039215689)
2026 r4 = load8 arg(1)
2037 r4 = to_f32 r4
2048 r4 = mul_f32 r4 3B808081 (0.0039215689)
2059 r3 = mad_f32 r4 r0 r3
20610 r4 = mul_f32 r3 3D93DD98 (0.0722)
20711 r4 = mad_f32 r3 r2 r4
20812 r4 = mad_f32 r3 r1 r4
20913 r4 = mul_f32 r4 437F0000 (255)
21014 r4 = round r4
21115 store8 arg(1) r4
Mike Klein754bad32019-06-05 10:47:46 -0500212
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600213G8 over RGBA_8888
Mike Kleina6434a52020-01-08 14:06:52 -060021437 values:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600215 v0 = load8 arg(0)
216 v1 = to_f32 v0
217 v2 = mul_f32 v1 3B808081 (0.0039215689)
218 v3 = load32 arg(1)
Mike Kleina6434a52020-01-08 14:06:52 -0600219 v4 = bit_and v3 FF
220 v5 = to_f32 v4
221 v6 = mul_f32 v5 3B808081 (0.0039215689)
222 v7 = splat 0 (0)
223 v8 = mad_f32 v6 v7 v2
224 v9 = mul_f32 v8 437F0000 (255)
225 v10 = round v9
226 v11 = shr_i32 v3 8
227 v12 = bit_and v11 FF
Mike Klein1cb05992019-11-21 11:46:58 -0600228 v13 = to_f32 v12
229 v14 = mul_f32 v13 3B808081 (0.0039215689)
Mike Kleina6434a52020-01-08 14:06:52 -0600230 v15 = mad_f32 v14 v7 v2
Mike Klein1cb05992019-11-21 11:46:58 -0600231 v16 = mul_f32 v15 437F0000 (255)
232 v17 = round v16
Mike Kleina6434a52020-01-08 14:06:52 -0600233 v18 = pack v10 v17 8
234 v19 = shr_i32 v3 16
235 v20 = bit_and v19 FF
236 v21 = to_f32 v20
237 v22 = mul_f32 v21 3B808081 (0.0039215689)
238 v23 = mad_f32 v22 v7 v2
239 v24 = mul_f32 v23 437F0000 (255)
240 v25 = round v24
241 v26 = splat 3F800000 (1)
242 v27 = shr_i32 v3 24
243 v28 = bit_and v27 FF
244 v29 = to_f32 v28
245 v30 = mul_f32 v29 3B808081 (0.0039215689)
246 v31 = mad_f32 v30 v7 v26
247 v32 = mul_f32 v31 437F0000 (255)
248 v33 = round v32
249 v34 = pack v25 v33 8
250 v35 = pack v18 v34 16
251 store32 arg(1) v35
Mike Klein22ea7e92019-06-10 12:05:48 -0500252
Mike Kleina6434a52020-01-08 14:06:52 -06002536 registers, 37 instructions:
2540 r0 = splat 0 (0)
2551 r1 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600256loop:
Mike Kleina6434a52020-01-08 14:06:52 -06002572 r2 = load8 arg(0)
2583 r2 = to_f32 r2
2594 r2 = mul_f32 r2 3B808081 (0.0039215689)
2605 r3 = load32 arg(1)
2616 r4 = bit_and r3 FF
2627 r4 = to_f32 r4
2638 r4 = mul_f32 r4 3B808081 (0.0039215689)
2649 r4 = mad_f32 r4 r0 r2
26510 r4 = mul_f32 r4 437F0000 (255)
26611 r4 = round r4
26712 r5 = shr_i32 r3 8
26813 r5 = bit_and r5 FF
26914 r5 = to_f32 r5
27015 r5 = mul_f32 r5 3B808081 (0.0039215689)
27116 r5 = mad_f32 r5 r0 r2
27217 r5 = mul_f32 r5 437F0000 (255)
27318 r5 = round r5
27419 r5 = pack r4 r5 8
27520 r4 = shr_i32 r3 16
27621 r4 = bit_and r4 FF
27722 r4 = to_f32 r4
27823 r4 = mul_f32 r4 3B808081 (0.0039215689)
27924 r2 = mad_f32 r4 r0 r2
28025 r2 = mul_f32 r2 437F0000 (255)
28126 r2 = round r2
28227 r3 = shr_i32 r3 24
28328 r3 = bit_and r3 FF
28429 r3 = to_f32 r3
28530 r3 = mul_f32 r3 3B808081 (0.0039215689)
28631 r3 = mad_f32 r3 r0 r1
28732 r3 = mul_f32 r3 437F0000 (255)
28833 r3 = round r3
28934 r3 = pack r2 r3 8
29035 r3 = pack r5 r3 16
29136 store32 arg(1) r3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600292
293RGBA_8888 over A8
29414 values:
295 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600296 v1 = shr_i32 v0 24
297 v2 = bit_and v1 FF
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600298 v3 = to_f32 v2
299 v4 = mul_f32 v3 3B808081 (0.0039215689)
300 v5 = load8 arg(1)
301 v6 = to_f32 v5
302 v7 = mul_f32 v6 3B808081 (0.0039215689)
303 v8 = splat 3F800000 (1)
304 v9 = sub_f32 v8 v4
305 v10 = mad_f32 v7 v9 v4
306 v11 = mul_f32 v10 437F0000 (255)
307 v12 = round v11
308 store8 arg(1) v12
309
Mike Kleina6434a52020-01-08 14:06:52 -06003104 registers, 14 instructions:
3110 r0 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600312loop:
Mike Kleina6434a52020-01-08 14:06:52 -06003131 r1 = load32 arg(0)
3142 r1 = shr_i32 r1 24
3153 r1 = bit_and r1 FF
3164 r1 = to_f32 r1
3175 r1 = mul_f32 r1 3B808081 (0.0039215689)
3186 r2 = load8 arg(1)
3197 r2 = to_f32 r2
3208 r2 = mul_f32 r2 3B808081 (0.0039215689)
3219 r3 = sub_f32 r0 r1
32210 r1 = mad_f32 r2 r3 r1
32311 r1 = mul_f32 r1 437F0000 (255)
32412 r1 = round r1
32513 store8 arg(1) r1
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600326
327RGBA_8888 over G8
Mike Kleina6434a52020-01-08 14:06:52 -060032832 values:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600329 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600330 v1 = bit_and v0 FF
331 v2 = to_f32 v1
332 v3 = mul_f32 v2 3B808081 (0.0039215689)
333 v4 = load8 arg(1)
334 v5 = to_f32 v4
335 v6 = mul_f32 v5 3B808081 (0.0039215689)
336 v7 = shr_i32 v0 24
337 v8 = bit_and v7 FF
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600338 v9 = to_f32 v8
339 v10 = mul_f32 v9 3B808081 (0.0039215689)
340 v11 = splat 3F800000 (1)
341 v12 = sub_f32 v11 v10
Mike Kleina6434a52020-01-08 14:06:52 -0600342 v13 = mad_f32 v6 v12 v3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600343 v14 = splat 3E59B3D0 (0.21259999)
Mike Kleina6434a52020-01-08 14:06:52 -0600344 v15 = shr_i32 v0 8
345 v16 = bit_and v15 FF
346 v17 = to_f32 v16
347 v18 = mul_f32 v17 3B808081 (0.0039215689)
348 v19 = mad_f32 v6 v12 v18
349 v20 = splat 3F371759 (0.71520001)
350 v21 = shr_i32 v0 16
351 v22 = bit_and v21 FF
352 v23 = to_f32 v22
353 v24 = mul_f32 v23 3B808081 (0.0039215689)
354 v25 = mad_f32 v6 v12 v24
355 v26 = mul_f32 v25 3D93DD98 (0.0722)
356 v27 = mad_f32 v19 v20 v26
357 v28 = mad_f32 v13 v14 v27
358 v29 = mul_f32 v28 437F0000 (255)
359 v30 = round v29
360 store8 arg(1) v30
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600361
Mike Kleina6434a52020-01-08 14:06:52 -06003628 registers, 32 instructions:
3630 r0 = splat 3F800000 (1)
3641 r1 = splat 3E59B3D0 (0.21259999)
3652 r2 = splat 3F371759 (0.71520001)
Mike Kleina6307322019-06-07 15:44:26 -0500366loop:
Mike Kleina6434a52020-01-08 14:06:52 -06003673 r3 = load32 arg(0)
3684 r4 = bit_and r3 FF
3695 r4 = to_f32 r4
3706 r4 = mul_f32 r4 3B808081 (0.0039215689)
3717 r5 = load8 arg(1)
3728 r5 = to_f32 r5
3739 r5 = mul_f32 r5 3B808081 (0.0039215689)
37410 r6 = shr_i32 r3 24
37511 r6 = bit_and r6 FF
37612 r6 = to_f32 r6
37713 r6 = mul_f32 r6 3B808081 (0.0039215689)
37814 r6 = sub_f32 r0 r6
37915 r4 = mad_f32 r5 r6 r4
38016 r7 = shr_i32 r3 8
38117 r7 = bit_and r7 FF
38218 r7 = to_f32 r7
38319 r7 = mul_f32 r7 3B808081 (0.0039215689)
38420 r7 = mad_f32 r5 r6 r7
38521 r3 = shr_i32 r3 16
38622 r3 = bit_and r3 FF
38723 r3 = to_f32 r3
38824 r3 = mul_f32 r3 3B808081 (0.0039215689)
38925 r3 = mad_f32 r5 r6 r3
39026 r3 = mul_f32 r3 3D93DD98 (0.0722)
39127 r3 = mad_f32 r7 r2 r3
39228 r3 = mad_f32 r4 r1 r3
39329 r3 = mul_f32 r3 437F0000 (255)
39430 r3 = round r3
39531 store8 arg(1) r3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600396
397RGBA_8888 over RGBA_8888
Mike Kleina6434a52020-01-08 14:06:52 -060039850 values:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600399 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600400 v1 = bit_and v0 FF
401 v2 = to_f32 v1
402 v3 = mul_f32 v2 3B808081 (0.0039215689)
403 v4 = load32 arg(1)
404 v5 = bit_and v4 FF
405 v6 = to_f32 v5
406 v7 = mul_f32 v6 3B808081 (0.0039215689)
407 v8 = shr_i32 v0 24
408 v9 = bit_and v8 FF
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600409 v10 = to_f32 v9
410 v11 = mul_f32 v10 3B808081 (0.0039215689)
411 v12 = splat 3F800000 (1)
412 v13 = sub_f32 v12 v11
Mike Kleina6434a52020-01-08 14:06:52 -0600413 v14 = mad_f32 v7 v13 v3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600414 v15 = mul_f32 v14 437F0000 (255)
415 v16 = round v15
Mike Kleina6434a52020-01-08 14:06:52 -0600416 v17 = shr_i32 v0 8
417 v18 = bit_and v17 FF
418 v19 = to_f32 v18
419 v20 = mul_f32 v19 3B808081 (0.0039215689)
420 v21 = shr_i32 v4 8
421 v22 = bit_and v21 FF
422 v23 = to_f32 v22
423 v24 = mul_f32 v23 3B808081 (0.0039215689)
424 v25 = mad_f32 v24 v13 v20
425 v26 = mul_f32 v25 437F0000 (255)
426 v27 = round v26
427 v28 = pack v16 v27 8
428 v29 = shr_i32 v0 16
429 v30 = bit_and v29 FF
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600430 v31 = to_f32 v30
431 v32 = mul_f32 v31 3B808081 (0.0039215689)
Mike Kleina6434a52020-01-08 14:06:52 -0600432 v33 = shr_i32 v4 16
433 v34 = bit_and v33 FF
434 v35 = to_f32 v34
435 v36 = mul_f32 v35 3B808081 (0.0039215689)
436 v37 = mad_f32 v36 v13 v32
437 v38 = mul_f32 v37 437F0000 (255)
438 v39 = round v38
439 v40 = shr_i32 v4 24
440 v41 = bit_and v40 FF
441 v42 = to_f32 v41
442 v43 = mul_f32 v42 3B808081 (0.0039215689)
443 v44 = mad_f32 v43 v13 v11
444 v45 = mul_f32 v44 437F0000 (255)
445 v46 = round v45
446 v47 = pack v39 v46 8
447 v48 = pack v28 v47 16
448 store32 arg(1) v48
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600449
Mike Kleina6434a52020-01-08 14:06:52 -06004508 registers, 50 instructions:
4510 r0 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600452loop:
Mike Kleina6434a52020-01-08 14:06:52 -06004531 r1 = load32 arg(0)
4542 r2 = bit_and r1 FF
4553 r2 = to_f32 r2
4564 r2 = mul_f32 r2 3B808081 (0.0039215689)
4575 r3 = load32 arg(1)
4586 r4 = bit_and r3 FF
4597 r4 = to_f32 r4
4608 r4 = mul_f32 r4 3B808081 (0.0039215689)
4619 r5 = shr_i32 r1 24
46210 r5 = bit_and r5 FF
46311 r5 = to_f32 r5
46412 r5 = mul_f32 r5 3B808081 (0.0039215689)
46513 r6 = sub_f32 r0 r5
46614 r2 = mad_f32 r4 r6 r2
46715 r2 = mul_f32 r2 437F0000 (255)
46816 r2 = round r2
46917 r4 = shr_i32 r1 8
47018 r4 = bit_and r4 FF
47119 r4 = to_f32 r4
47220 r4 = mul_f32 r4 3B808081 (0.0039215689)
47321 r7 = shr_i32 r3 8
47422 r7 = bit_and r7 FF
47523 r7 = to_f32 r7
47624 r7 = mul_f32 r7 3B808081 (0.0039215689)
47725 r4 = mad_f32 r7 r6 r4
47826 r4 = mul_f32 r4 437F0000 (255)
47927 r4 = round r4
48028 r4 = pack r2 r4 8
48129 r1 = shr_i32 r1 16
48230 r1 = bit_and r1 FF
48331 r1 = to_f32 r1
48432 r1 = mul_f32 r1 3B808081 (0.0039215689)
48533 r2 = shr_i32 r3 16
48634 r2 = bit_and r2 FF
48735 r2 = to_f32 r2
48836 r2 = mul_f32 r2 3B808081 (0.0039215689)
48937 r1 = mad_f32 r2 r6 r1
49038 r1 = mul_f32 r1 437F0000 (255)
49139 r1 = round r1
49240 r3 = shr_i32 r3 24
49341 r3 = bit_and r3 FF
49442 r3 = to_f32 r3
49543 r3 = mul_f32 r3 3B808081 (0.0039215689)
49644 r5 = mad_f32 r3 r6 r5
49745 r5 = mul_f32 r5 437F0000 (255)
49846 r5 = round r5
49947 r5 = pack r1 r5 8
50048 r5 = pack r4 r5 16
50149 store32 arg(1) r5
Mike Klein267f5072019-06-03 16:27:46 -0500502
Mike Klein397fc882019-06-20 11:37:10 -0500503I32 (Naive) 8888 over 8888
Mike Kleina6434a52020-01-08 14:06:52 -060050434 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500505 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600506 v1 = bit_and v0 FF
507 v2 = load32 arg(1)
508 v3 = bit_and v2 FF
509 v4 = shr_i32 v0 24
510 v5 = bit_and v4 FF
Mike Kleind48488b2019-10-22 12:27:58 -0500511 v6 = splat 100 (3.5873241e-43)
512 v7 = sub_i32 v6 v5
Mike Kleina6434a52020-01-08 14:06:52 -0600513 v8 = mul_i32 v3 v7
Mike Kleind48488b2019-10-22 12:27:58 -0500514 v9 = shr_i32 v8 8
Mike Kleina6434a52020-01-08 14:06:52 -0600515 v10 = add_i32 v1 v9
516 v11 = shr_i32 v0 8
517 v12 = bit_and v11 FF
518 v13 = shr_i32 v2 8
519 v14 = bit_and v13 FF
520 v15 = mul_i32 v14 v7
521 v16 = shr_i32 v15 8
522 v17 = add_i32 v12 v16
523 v18 = pack v10 v17 8
524 v19 = shr_i32 v0 16
525 v20 = bit_and v19 FF
526 v21 = shr_i32 v2 16
527 v22 = bit_and v21 FF
Mike Kleind48488b2019-10-22 12:27:58 -0500528 v23 = mul_i32 v22 v7
529 v24 = shr_i32 v23 8
Mike Kleina6434a52020-01-08 14:06:52 -0600530 v25 = add_i32 v20 v24
531 v26 = shr_i32 v2 24
532 v27 = bit_and v26 FF
533 v28 = mul_i32 v27 v7
534 v29 = shr_i32 v28 8
535 v30 = add_i32 v5 v29
536 v31 = pack v25 v30 8
537 v32 = pack v18 v31 16
538 store32 arg(1) v32
Mike Kleinaab45b52019-07-02 15:39:23 -0500539
Mike Kleina6434a52020-01-08 14:06:52 -06005408 registers, 34 instructions:
5410 r0 = splat 100 (3.5873241e-43)
Mike Klein397fc882019-06-20 11:37:10 -0500542loop:
Mike Kleina6434a52020-01-08 14:06:52 -06005431 r1 = load32 arg(0)
5442 r2 = bit_and r1 FF
5453 r3 = load32 arg(1)
5464 r4 = bit_and r3 FF
5475 r5 = shr_i32 r1 24
5486 r5 = bit_and r5 FF
5497 r6 = sub_i32 r0 r5
5508 r4 = mul_i32 r4 r6
5519 r4 = shr_i32 r4 8
55210 r4 = add_i32 r2 r4
55311 r2 = shr_i32 r1 8
55412 r2 = bit_and r2 FF
55513 r7 = shr_i32 r3 8
55614 r7 = bit_and r7 FF
55715 r7 = mul_i32 r7 r6
55816 r7 = shr_i32 r7 8
55917 r7 = add_i32 r2 r7
56018 r7 = pack r4 r7 8
56119 r1 = shr_i32 r1 16
56220 r1 = bit_and r1 FF
56321 r4 = shr_i32 r3 16
56422 r4 = bit_and r4 FF
56523 r4 = mul_i32 r4 r6
56624 r4 = shr_i32 r4 8
56725 r4 = add_i32 r1 r4
56826 r3 = shr_i32 r3 24
56927 r3 = bit_and r3 FF
57028 r6 = mul_i32 r3 r6
57129 r6 = shr_i32 r6 8
57230 r6 = add_i32 r5 r6
57331 r6 = pack r4 r6 8
57432 r6 = pack r7 r6 16
57533 store32 arg(1) r6
Mike Klein397fc882019-06-20 11:37:10 -0500576
Mike Klein7b7077c2019-06-03 17:10:59 -0500577I32 8888 over 8888
Mike Kleina6434a52020-01-08 14:06:52 -060057828 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500579 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600580 v1 = bit_and v0 FF
581 v2 = load32 arg(1)
582 v3 = bit_and v2 FF
583 v4 = shr_i32 v0 24
584 v5 = splat 100 (3.5873241e-43)
585 v6 = sub_i32 v5 v4
586 v7 = mul_i16x2 v3 v6
587 v8 = shr_i32 v7 8
588 v9 = add_i32 v1 v8
589 v10 = bytes v0 2
590 v11 = bytes v2 2
591 v12 = mul_i16x2 v11 v6
592 v13 = shr_i32 v12 8
593 v14 = add_i32 v10 v13
594 v15 = pack v9 v14 8
595 v16 = bytes v0 3
596 v17 = bytes v2 3
597 v18 = mul_i16x2 v17 v6
598 v19 = shr_i32 v18 8
599 v20 = add_i32 v16 v19
600 v21 = shr_i32 v2 24
601 v22 = mul_i16x2 v21 v6
602 v23 = shr_i32 v22 8
603 v24 = add_i32 v4 v23
604 v25 = pack v20 v24 8
605 v26 = pack v15 v25 16
606 store32 arg(1) v26
Mike Kleinaab45b52019-07-02 15:39:23 -0500607
Mike Kleina6434a52020-01-08 14:06:52 -06006088 registers, 28 instructions:
6090 r0 = splat 100 (3.5873241e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500610loop:
Mike Kleina6434a52020-01-08 14:06:52 -06006111 r1 = load32 arg(0)
6122 r2 = bit_and r1 FF
6133 r3 = load32 arg(1)
6144 r4 = bit_and r3 FF
6155 r5 = shr_i32 r1 24
6166 r6 = sub_i32 r0 r5
6177 r4 = mul_i16x2 r4 r6
6188 r4 = shr_i32 r4 8
6199 r4 = add_i32 r2 r4
62010 r2 = bytes r1 2
62111 r7 = bytes r3 2
62212 r7 = mul_i16x2 r7 r6
62313 r7 = shr_i32 r7 8
62414 r7 = add_i32 r2 r7
62515 r7 = pack r4 r7 8
62616 r1 = bytes r1 3
62717 r4 = bytes r3 3
62818 r4 = mul_i16x2 r4 r6
62919 r4 = shr_i32 r4 8
63020 r4 = add_i32 r1 r4
63121 r3 = shr_i32 r3 24
63222 r6 = mul_i16x2 r3 r6
63323 r6 = shr_i32 r6 8
63424 r6 = add_i32 r5 r6
63525 r6 = pack r4 r6 8
63626 r6 = pack r7 r6 16
63727 store32 arg(1) r6
Mike Klein821f5e82019-06-13 10:56:51 -0500638
639I32 (SWAR) 8888 over 8888
Mike Kleina6434a52020-01-08 14:06:52 -060064014 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500641 v0 = load32 arg(0)
642 v1 = bytes v0 404
Mike Klein5e533c92019-07-22 13:44:54 -0500643 v2 = splat 1000100 (2.3510604e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500644 v3 = sub_i16x2 v2 v1
645 v4 = load32 arg(1)
Mike Kleina6434a52020-01-08 14:06:52 -0600646 v5 = bit_and v4 FF00FF
647 v6 = mul_i16x2 v5 v3
648 v7 = shr_i16x2 v6 8
649 v8 = shr_i16x2 v4 8
650 v9 = mul_i16x2 v8 v3
651 v10 = bit_and v9 FF00FF00
652 v11 = bit_or v7 v10
653 v12 = add_i32 v0 v11
654 store32 arg(1) v12
Mike Kleinaab45b52019-07-02 15:39:23 -0500655
Mike Kleina6434a52020-01-08 14:06:52 -06006565 registers, 14 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006570 r0 = splat 1000100 (2.3510604e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500658loop:
Mike Kleina6434a52020-01-08 14:06:52 -06006591 r1 = load32 arg(0)
6602 r2 = bytes r1 404
6613 r2 = sub_i16x2 r0 r2
6624 r3 = load32 arg(1)
6635 r4 = bit_and r3 FF00FF
6646 r4 = mul_i16x2 r4 r2
6657 r4 = shr_i16x2 r4 8
6668 r3 = shr_i16x2 r3 8
6679 r2 = mul_i16x2 r3 r2
66810 r2 = bit_and r2 FF00FF00
66911 r2 = bit_or r4 r2
67012 r2 = add_i32 r1 r2
67113 store32 arg(1) r2
Mike Klein7b7077c2019-06-03 17:10:59 -0500672
Mike Kleinf9963112019-08-08 15:13:25 -04006736 values:
Mike Klein0f61c122019-10-16 10:46:01 -0500674 v0 = splat 1 (1.4012985e-45)
675 v1 = splat 2 (2.8025969e-45)
Mike Kleinf9963112019-08-08 15:13:25 -0400676 v2 = add_i32 v0 v1
677 v3 = load32 arg(0)
678 v4 = mul_i32 v3 v2
679 store32 arg(0) v4
680
6812 registers, 6 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006820 r0 = splat 1 (1.4012985e-45)
6831 r1 = splat 2 (2.8025969e-45)
6842 r1 = add_i32 r0 r1
Mike Kleinf9963112019-08-08 15:13:25 -0400685loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06006863 r0 = load32 arg(0)
6874 r0 = mul_i32 r0 r1
6885 store32 arg(0) r0
Mike Kleinf9963112019-08-08 15:13:25 -0400689
Mike Kleina6434a52020-01-08 14:06:52 -060069024 values:
691 v0 = load32 arg(0)
692 v1 = bit_and v0 FF
693 v2 = load32 arg(1)
694 v3 = bit_and v2 FF
695 v4 = add_i32 v1 v3
696 v5 = shr_i32 v0 8
697 v6 = bit_and v5 FF
698 v7 = shr_i32 v2 8
699 v8 = bit_and v7 FF
700 v9 = add_i32 v6 v8
701 v10 = pack v4 v9 8
702 v11 = shr_i32 v0 16
703 v12 = bit_and v11 FF
704 v13 = shr_i32 v2 16
705 v14 = bit_and v13 FF
706 v15 = add_i32 v12 v14
707 v16 = shr_i32 v0 24
708 v17 = bit_and v16 FF
709 v18 = shr_i32 v2 24
710 v19 = bit_and v18 FF
711 v20 = add_i32 v17 v19
712 v21 = pack v15 v20 8
713 v22 = pack v10 v21 16
714 store32 arg(1) v22
Mike Kleind48488b2019-10-22 12:27:58 -0500715
Mike Kleina6434a52020-01-08 14:06:52 -06007165 registers, 24 instructions:
Mike Kleind48488b2019-10-22 12:27:58 -0500717loop:
Mike Kleina6434a52020-01-08 14:06:52 -06007180 r0 = load32 arg(0)
7191 r1 = bit_and r0 FF
7202 r2 = load32 arg(1)
7213 r3 = bit_and r2 FF
7224 r3 = add_i32 r1 r3
7235 r1 = shr_i32 r0 8
7246 r1 = bit_and r1 FF
7257 r4 = shr_i32 r2 8
7268 r4 = bit_and r4 FF
7279 r4 = add_i32 r1 r4
72810 r4 = pack r3 r4 8
72911 r3 = shr_i32 r0 16
73012 r3 = bit_and r3 FF
73113 r1 = shr_i32 r2 16
73214 r1 = bit_and r1 FF
73315 r1 = add_i32 r3 r1
73416 r0 = shr_i32 r0 24
73517 r0 = bit_and r0 FF
73618 r2 = shr_i32 r2 24
73719 r2 = bit_and r2 FF
73820 r2 = add_i32 r0 r2
73921 r2 = pack r1 r2 8
74022 r2 = pack r4 r2 16
74123 store32 arg(1) r2
Mike Kleind48488b2019-10-22 12:27:58 -0500742