blob: 6a7946b6226a73bcd715445e15fa4b8028d987ce [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600212 values:
3 v0 = load8 arg(0)
4 v1 = to_f32 v0
5 v2 = mul_f32 v1 3B808081 (0.0039215689)
6 v3 = load8 arg(1)
7 v4 = to_f32 v3
8 v5 = mul_f32 v4 3B808081 (0.0039215689)
9 v6 = splat 3F800000 (1)
10 v7 = sub_f32 v6 v2
11 v8 = mad_f32 v5 v7 v2
12 v9 = mul_f32 v8 437F0000 (255)
13 v10 = round v9
14 store8 arg(1) v10
Mike Klein22ea7e92019-06-10 12:05:48 -050015
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600164 registers, 12 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -0600170 r0 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -050018loop:
Mike Kleinb5c43552020-01-07 11:39:30 -0600191 r1 = load8 arg(0)
202 r1 = to_f32 r1
213 r1 = mul_f32 r1 3B808081 (0.0039215689)
224 r2 = load8 arg(1)
235 r2 = to_f32 r2
246 r2 = mul_f32 r2 3B808081 (0.0039215689)
257 r3 = sub_f32 r0 r1
268 r1 = mad_f32 r2 r3 r1
279 r1 = mul_f32 r1 437F0000 (255)
2810 r1 = round r1
2911 store8 arg(1) r1
Mike Klein8c1e0ef2019-11-12 09:07:23 -060030
31A8 over G8
3217 values:
33 v0 = load8 arg(1)
34 v1 = to_f32 v0
35 v2 = mul_f32 v1 3B808081 (0.0039215689)
36 v3 = load8 arg(0)
37 v4 = to_f32 v3
38 v5 = mul_f32 v4 3B808081 (0.0039215689)
39 v6 = splat 3F800000 (1)
40 v7 = sub_f32 v6 v5
41 v8 = mul_f32 v2 v7
42 v9 = splat 3E59B3D0 (0.21259999)
43 v10 = splat 3F371759 (0.71520001)
44 v11 = mul_f32 v8 3D93DD98 (0.0722)
45 v12 = mad_f32 v8 v10 v11
46 v13 = mad_f32 v8 v9 v12
47 v14 = mul_f32 v13 437F0000 (255)
48 v15 = round v14
49 store8 arg(1) v15
50
515 registers, 17 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -0600520 r0 = splat 3F800000 (1)
531 r1 = splat 3E59B3D0 (0.21259999)
542 r2 = splat 3F371759 (0.71520001)
Mike Klein8c1e0ef2019-11-12 09:07:23 -060055loop:
Mike Kleinb5c43552020-01-07 11:39:30 -0600563 r3 = load8 arg(1)
574 r3 = to_f32 r3
585 r3 = mul_f32 r3 3B808081 (0.0039215689)
596 r4 = load8 arg(0)
607 r4 = to_f32 r4
618 r4 = mul_f32 r4 3B808081 (0.0039215689)
629 r4 = sub_f32 r0 r4
6310 r4 = mul_f32 r3 r4
6411 r3 = mul_f32 r4 3D93DD98 (0.0722)
6512 r3 = mad_f32 r4 r2 r3
6613 r3 = mad_f32 r4 r1 r3
6714 r3 = mul_f32 r3 437F0000 (255)
6815 r3 = round r3
6916 store8 arg(1) r3
Mike Klein267f5072019-06-03 16:27:46 -050070
Mike Klein754bad32019-06-05 10:47:46 -050071A8 over RGBA_8888
Mike Klein8c1e0ef2019-11-12 09:07:23 -06007235 values:
73 v0 = load32 arg(1)
74 v1 = splat FF (3.5733111e-43)
75 v2 = extract v0 0 v1
76 v3 = to_f32 v2
77 v4 = mul_f32 v3 3B808081 (0.0039215689)
78 v5 = load8 arg(0)
79 v6 = to_f32 v5
80 v7 = mul_f32 v6 3B808081 (0.0039215689)
81 v8 = splat 3F800000 (1)
82 v9 = sub_f32 v8 v7
83 v10 = mul_f32 v4 v9
84 v11 = mul_f32 v10 437F0000 (255)
85 v12 = round v11
86 v13 = extract v0 8 v1
87 v14 = to_f32 v13
88 v15 = mul_f32 v14 3B808081 (0.0039215689)
89 v16 = mul_f32 v15 v9
90 v17 = mul_f32 v16 437F0000 (255)
91 v18 = round v17
92 v19 = pack v12 v18 8
93 v20 = extract v0 16 v1
94 v21 = to_f32 v20
95 v22 = mul_f32 v21 3B808081 (0.0039215689)
96 v23 = mul_f32 v22 v9
97 v24 = mul_f32 v23 437F0000 (255)
98 v25 = round v24
99 v26 = extract v0 24 v1
100 v27 = to_f32 v26
101 v28 = mul_f32 v27 3B808081 (0.0039215689)
102 v29 = mad_f32 v28 v9 v7
103 v30 = mul_f32 v29 437F0000 (255)
104 v31 = round v30
105 v32 = pack v25 v31 8
106 v33 = pack v19 v32 16
107 store32 arg(1) v33
Mike Klein22ea7e92019-06-10 12:05:48 -0500108
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001097 registers, 35 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06001100 r0 = splat FF (3.5733111e-43)
1111 r1 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -0500112loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06001132 r2 = load32 arg(1)
1143 r3 = extract r2 0 r0
1154 r3 = to_f32 r3
1165 r3 = mul_f32 r3 3B808081 (0.0039215689)
1176 r4 = load8 arg(0)
1187 r4 = to_f32 r4
1198 r4 = mul_f32 r4 3B808081 (0.0039215689)
1209 r5 = sub_f32 r1 r4
12110 r3 = mul_f32 r3 r5
12211 r3 = mul_f32 r3 437F0000 (255)
12312 r3 = round r3
12413 r6 = extract r2 8 r0
12514 r6 = to_f32 r6
12615 r6 = mul_f32 r6 3B808081 (0.0039215689)
12716 r6 = mul_f32 r6 r5
12817 r6 = mul_f32 r6 437F0000 (255)
12918 r6 = round r6
13019 r6 = pack r3 r6 8
13120 r3 = extract r2 16 r0
13221 r3 = to_f32 r3
13322 r3 = mul_f32 r3 3B808081 (0.0039215689)
13423 r3 = mul_f32 r3 r5
13524 r3 = mul_f32 r3 437F0000 (255)
13625 r3 = round r3
13726 r2 = extract r2 24 r0
13827 r2 = to_f32 r2
13928 r2 = mul_f32 r2 3B808081 (0.0039215689)
14029 r4 = mad_f32 r2 r5 r4
14130 r4 = mul_f32 r4 437F0000 (255)
14231 r4 = round r4
14332 r4 = pack r3 r4 8
14433 r4 = pack r6 r4 16
14534 store32 arg(1) r4
Mike Klein754bad32019-06-05 10:47:46 -0500146
147G8 over A8
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001489 values:
Mike Kleind48488b2019-10-22 12:27:58 -0500149 v0 = splat 3F800000 (1)
Mike Klein1cb05992019-11-21 11:46:58 -0600150 v1 = splat 0 (0)
151 v2 = load8 arg(1)
152 v3 = to_f32 v2
153 v4 = mul_f32 v3 3B808081 (0.0039215689)
154 v5 = mad_f32 v4 v1 v0
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600155 v6 = mul_f32 v5 437F0000 (255)
156 v7 = round v6
157 store8 arg(1) v7
Mike Klein22ea7e92019-06-10 12:05:48 -0500158
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001593 registers, 9 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06001600 r0 = splat 3F800000 (1)
1611 r1 = splat 0 (0)
Mike Klein754bad32019-06-05 10:47:46 -0500162loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06001632 r2 = load8 arg(1)
1643 r2 = to_f32 r2
1654 r2 = mul_f32 r2 3B808081 (0.0039215689)
1665 r2 = mad_f32 r2 r1 r0
1676 r2 = mul_f32 r2 437F0000 (255)
1687 r2 = round r2
1698 store8 arg(1) r2
Mike Klein754bad32019-06-05 10:47:46 -0500170
171G8 over G8
Mike Klein1cb05992019-11-21 11:46:58 -060017216 values:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600173 v0 = load8 arg(0)
174 v1 = to_f32 v0
175 v2 = mul_f32 v1 3B808081 (0.0039215689)
176 v3 = load8 arg(1)
Mike Kleind48488b2019-10-22 12:27:58 -0500177 v4 = to_f32 v3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600178 v5 = mul_f32 v4 3B808081 (0.0039215689)
Mike Klein1cb05992019-11-21 11:46:58 -0600179 v6 = splat 0 (0)
180 v7 = mad_f32 v5 v6 v2
181 v8 = splat 3E59B3D0 (0.21259999)
182 v9 = splat 3F371759 (0.71520001)
183 v10 = mul_f32 v7 3D93DD98 (0.0722)
184 v11 = mad_f32 v7 v9 v10
185 v12 = mad_f32 v7 v8 v11
186 v13 = mul_f32 v12 437F0000 (255)
187 v14 = round v13
188 store8 arg(1) v14
Mike Klein22ea7e92019-06-10 12:05:48 -0500189
Mike Klein1cb05992019-11-21 11:46:58 -06001905 registers, 16 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06001910 r0 = splat 0 (0)
1921 r1 = splat 3E59B3D0 (0.21259999)
1932 r2 = splat 3F371759 (0.71520001)
Mike Klein754bad32019-06-05 10:47:46 -0500194loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06001953 r3 = load8 arg(0)
1964 r3 = to_f32 r3
1975 r3 = mul_f32 r3 3B808081 (0.0039215689)
1986 r4 = load8 arg(1)
1997 r4 = to_f32 r4
2008 r4 = mul_f32 r4 3B808081 (0.0039215689)
2019 r3 = mad_f32 r4 r0 r3
20210 r4 = mul_f32 r3 3D93DD98 (0.0722)
20311 r4 = mad_f32 r3 r2 r4
20412 r4 = mad_f32 r3 r1 r4
20513 r4 = mul_f32 r4 437F0000 (255)
20614 r4 = round r4
20715 store8 arg(1) r4
Mike Klein754bad32019-06-05 10:47:46 -0500208
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600209G8 over RGBA_8888
21035 values:
211 v0 = load8 arg(0)
212 v1 = to_f32 v0
213 v2 = mul_f32 v1 3B808081 (0.0039215689)
214 v3 = load32 arg(1)
215 v4 = splat FF (3.5733111e-43)
216 v5 = extract v3 0 v4
217 v6 = to_f32 v5
218 v7 = mul_f32 v6 3B808081 (0.0039215689)
Mike Klein1cb05992019-11-21 11:46:58 -0600219 v8 = splat 0 (0)
220 v9 = mad_f32 v7 v8 v2
221 v10 = mul_f32 v9 437F0000 (255)
222 v11 = round v10
223 v12 = extract v3 8 v4
224 v13 = to_f32 v12
225 v14 = mul_f32 v13 3B808081 (0.0039215689)
226 v15 = mad_f32 v14 v8 v2
227 v16 = mul_f32 v15 437F0000 (255)
228 v17 = round v16
229 v18 = pack v11 v17 8
230 v19 = extract v3 16 v4
231 v20 = to_f32 v19
232 v21 = mul_f32 v20 3B808081 (0.0039215689)
233 v22 = mad_f32 v21 v8 v2
234 v23 = mul_f32 v22 437F0000 (255)
235 v24 = round v23
236 v25 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600237 v26 = extract v3 24 v4
238 v27 = to_f32 v26
239 v28 = mul_f32 v27 3B808081 (0.0039215689)
Mike Klein1cb05992019-11-21 11:46:58 -0600240 v29 = mad_f32 v28 v8 v25
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600241 v30 = mul_f32 v29 437F0000 (255)
242 v31 = round v30
Mike Klein1cb05992019-11-21 11:46:58 -0600243 v32 = pack v24 v31 8
244 v33 = pack v18 v32 16
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600245 store32 arg(1) v33
Mike Klein22ea7e92019-06-10 12:05:48 -0500246
Mike Klein8c1e0ef2019-11-12 09:07:23 -06002477 registers, 35 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06002480 r0 = splat FF (3.5733111e-43)
2491 r1 = splat 0 (0)
2502 r2 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600251loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06002523 r3 = load8 arg(0)
2534 r3 = to_f32 r3
2545 r3 = mul_f32 r3 3B808081 (0.0039215689)
2556 r4 = load32 arg(1)
2567 r5 = extract r4 0 r0
2578 r5 = to_f32 r5
2589 r5 = mul_f32 r5 3B808081 (0.0039215689)
25910 r5 = mad_f32 r5 r1 r3
26011 r5 = mul_f32 r5 437F0000 (255)
26112 r5 = round r5
26213 r6 = extract r4 8 r0
26314 r6 = to_f32 r6
26415 r6 = mul_f32 r6 3B808081 (0.0039215689)
26516 r6 = mad_f32 r6 r1 r3
26617 r6 = mul_f32 r6 437F0000 (255)
26718 r6 = round r6
26819 r6 = pack r5 r6 8
26920 r5 = extract r4 16 r0
27021 r5 = to_f32 r5
27122 r5 = mul_f32 r5 3B808081 (0.0039215689)
27223 r3 = mad_f32 r5 r1 r3
27324 r3 = mul_f32 r3 437F0000 (255)
27425 r3 = round r3
27526 r4 = extract r4 24 r0
27627 r4 = to_f32 r4
27728 r4 = mul_f32 r4 3B808081 (0.0039215689)
27829 r4 = mad_f32 r4 r1 r2
27930 r4 = mul_f32 r4 437F0000 (255)
28031 r4 = round r4
28132 r4 = pack r3 r4 8
28233 r4 = pack r6 r4 16
28334 store32 arg(1) r4
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600284
285RGBA_8888 over A8
28614 values:
287 v0 = load32 arg(0)
288 v1 = splat FF (3.5733111e-43)
289 v2 = extract v0 24 v1
290 v3 = to_f32 v2
291 v4 = mul_f32 v3 3B808081 (0.0039215689)
292 v5 = load8 arg(1)
293 v6 = to_f32 v5
294 v7 = mul_f32 v6 3B808081 (0.0039215689)
295 v8 = splat 3F800000 (1)
296 v9 = sub_f32 v8 v4
297 v10 = mad_f32 v7 v9 v4
298 v11 = mul_f32 v10 437F0000 (255)
299 v12 = round v11
300 store8 arg(1) v12
301
3025 registers, 14 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06003030 r0 = splat FF (3.5733111e-43)
3041 r1 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600305loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06003062 r2 = load32 arg(0)
3073 r2 = extract r2 24 r0
3084 r2 = to_f32 r2
3095 r2 = mul_f32 r2 3B808081 (0.0039215689)
3106 r3 = load8 arg(1)
3117 r3 = to_f32 r3
3128 r3 = mul_f32 r3 3B808081 (0.0039215689)
3139 r4 = sub_f32 r1 r2
31410 r2 = mad_f32 r3 r4 r2
31511 r2 = mul_f32 r2 437F0000 (255)
31612 r2 = round r2
31713 store8 arg(1) r2
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600318
319RGBA_8888 over G8
32030 values:
321 v0 = load32 arg(0)
322 v1 = splat FF (3.5733111e-43)
323 v2 = extract v0 0 v1
324 v3 = to_f32 v2
325 v4 = mul_f32 v3 3B808081 (0.0039215689)
326 v5 = load8 arg(1)
327 v6 = to_f32 v5
328 v7 = mul_f32 v6 3B808081 (0.0039215689)
329 v8 = extract v0 24 v1
330 v9 = to_f32 v8
331 v10 = mul_f32 v9 3B808081 (0.0039215689)
332 v11 = splat 3F800000 (1)
333 v12 = sub_f32 v11 v10
334 v13 = mad_f32 v7 v12 v4
335 v14 = splat 3E59B3D0 (0.21259999)
336 v15 = extract v0 8 v1
337 v16 = to_f32 v15
338 v17 = mul_f32 v16 3B808081 (0.0039215689)
339 v18 = mad_f32 v7 v12 v17
340 v19 = splat 3F371759 (0.71520001)
341 v20 = extract v0 16 v1
342 v21 = to_f32 v20
343 v22 = mul_f32 v21 3B808081 (0.0039215689)
344 v23 = mad_f32 v7 v12 v22
345 v24 = mul_f32 v23 3D93DD98 (0.0722)
346 v25 = mad_f32 v18 v19 v24
347 v26 = mad_f32 v13 v14 v25
348 v27 = mul_f32 v26 437F0000 (255)
349 v28 = round v27
350 store8 arg(1) v28
351
3529 registers, 30 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06003530 r0 = splat FF (3.5733111e-43)
3541 r1 = splat 3F800000 (1)
3552 r2 = splat 3E59B3D0 (0.21259999)
3563 r3 = splat 3F371759 (0.71520001)
Mike Kleina6307322019-06-07 15:44:26 -0500357loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06003584 r4 = load32 arg(0)
3595 r5 = extract r4 0 r0
3606 r5 = to_f32 r5
3617 r5 = mul_f32 r5 3B808081 (0.0039215689)
3628 r6 = load8 arg(1)
3639 r6 = to_f32 r6
36410 r6 = mul_f32 r6 3B808081 (0.0039215689)
36511 r7 = extract r4 24 r0
36612 r7 = to_f32 r7
36713 r7 = mul_f32 r7 3B808081 (0.0039215689)
36814 r7 = sub_f32 r1 r7
36915 r5 = mad_f32 r6 r7 r5
37016 r8 = extract r4 8 r0
37117 r8 = to_f32 r8
37218 r8 = mul_f32 r8 3B808081 (0.0039215689)
37319 r8 = mad_f32 r6 r7 r8
37420 r4 = extract r4 16 r0
37521 r4 = to_f32 r4
37622 r4 = mul_f32 r4 3B808081 (0.0039215689)
37723 r4 = mad_f32 r6 r7 r4
37824 r4 = mul_f32 r4 3D93DD98 (0.0722)
37925 r4 = mad_f32 r8 r3 r4
38026 r4 = mad_f32 r5 r2 r4
38127 r4 = mul_f32 r4 437F0000 (255)
38228 r4 = round r4
38329 store8 arg(1) r4
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600384
385RGBA_8888 over RGBA_8888
38645 values:
387 v0 = load32 arg(0)
388 v1 = splat FF (3.5733111e-43)
389 v2 = extract v0 0 v1
390 v3 = to_f32 v2
391 v4 = mul_f32 v3 3B808081 (0.0039215689)
392 v5 = load32 arg(1)
393 v6 = extract v5 0 v1
394 v7 = to_f32 v6
395 v8 = mul_f32 v7 3B808081 (0.0039215689)
396 v9 = extract v0 24 v1
397 v10 = to_f32 v9
398 v11 = mul_f32 v10 3B808081 (0.0039215689)
399 v12 = splat 3F800000 (1)
400 v13 = sub_f32 v12 v11
401 v14 = mad_f32 v8 v13 v4
402 v15 = mul_f32 v14 437F0000 (255)
403 v16 = round v15
404 v17 = extract v0 8 v1
405 v18 = to_f32 v17
406 v19 = mul_f32 v18 3B808081 (0.0039215689)
407 v20 = extract v5 8 v1
408 v21 = to_f32 v20
409 v22 = mul_f32 v21 3B808081 (0.0039215689)
410 v23 = mad_f32 v22 v13 v19
411 v24 = mul_f32 v23 437F0000 (255)
412 v25 = round v24
413 v26 = pack v16 v25 8
414 v27 = extract v0 16 v1
415 v28 = to_f32 v27
416 v29 = mul_f32 v28 3B808081 (0.0039215689)
417 v30 = extract v5 16 v1
418 v31 = to_f32 v30
419 v32 = mul_f32 v31 3B808081 (0.0039215689)
420 v33 = mad_f32 v32 v13 v29
421 v34 = mul_f32 v33 437F0000 (255)
422 v35 = round v34
423 v36 = extract v5 24 v1
424 v37 = to_f32 v36
425 v38 = mul_f32 v37 3B808081 (0.0039215689)
426 v39 = mad_f32 v38 v13 v11
427 v40 = mul_f32 v39 437F0000 (255)
428 v41 = round v40
429 v42 = pack v35 v41 8
430 v43 = pack v26 v42 16
431 store32 arg(1) v43
432
4339 registers, 45 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06004340 r0 = splat FF (3.5733111e-43)
4351 r1 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600436loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06004372 r2 = load32 arg(0)
4383 r3 = extract r2 0 r0
4394 r3 = to_f32 r3
4405 r3 = mul_f32 r3 3B808081 (0.0039215689)
4416 r4 = load32 arg(1)
4427 r5 = extract r4 0 r0
4438 r5 = to_f32 r5
4449 r5 = mul_f32 r5 3B808081 (0.0039215689)
44510 r6 = extract r2 24 r0
44611 r6 = to_f32 r6
44712 r6 = mul_f32 r6 3B808081 (0.0039215689)
44813 r7 = sub_f32 r1 r6
44914 r3 = mad_f32 r5 r7 r3
45015 r3 = mul_f32 r3 437F0000 (255)
45116 r3 = round r3
45217 r5 = extract r2 8 r0
45318 r5 = to_f32 r5
45419 r5 = mul_f32 r5 3B808081 (0.0039215689)
45520 r8 = extract r4 8 r0
45621 r8 = to_f32 r8
45722 r8 = mul_f32 r8 3B808081 (0.0039215689)
45823 r5 = mad_f32 r8 r7 r5
45924 r5 = mul_f32 r5 437F0000 (255)
46025 r5 = round r5
46126 r5 = pack r3 r5 8
46227 r2 = extract r2 16 r0
46328 r2 = to_f32 r2
46429 r2 = mul_f32 r2 3B808081 (0.0039215689)
46530 r3 = extract r4 16 r0
46631 r3 = to_f32 r3
46732 r3 = mul_f32 r3 3B808081 (0.0039215689)
46833 r2 = mad_f32 r3 r7 r2
46934 r2 = mul_f32 r2 437F0000 (255)
47035 r2 = round r2
47136 r4 = extract r4 24 r0
47237 r4 = to_f32 r4
47338 r4 = mul_f32 r4 3B808081 (0.0039215689)
47439 r6 = mad_f32 r4 r7 r6
47540 r6 = mul_f32 r6 437F0000 (255)
47641 r6 = round r6
47742 r6 = pack r2 r6 8
47843 r6 = pack r5 r6 16
47944 store32 arg(1) r6
Mike Klein267f5072019-06-03 16:27:46 -0500480
Mike Klein397fc882019-06-20 11:37:10 -0500481I32 (Naive) 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050048229 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500483 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500484 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500485 v2 = extract v0 0 v1
Mike Kleind48488b2019-10-22 12:27:58 -0500486 v3 = load32 arg(1)
487 v4 = extract v3 0 v1
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500488 v5 = extract v0 24 v1
Mike Kleind48488b2019-10-22 12:27:58 -0500489 v6 = splat 100 (3.5873241e-43)
490 v7 = sub_i32 v6 v5
491 v8 = mul_i32 v4 v7
492 v9 = shr_i32 v8 8
493 v10 = add_i32 v2 v9
494 v11 = extract v0 8 v1
495 v12 = extract v3 8 v1
496 v13 = mul_i32 v12 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500497 v14 = shr_i32 v13 8
Mike Kleind48488b2019-10-22 12:27:58 -0500498 v15 = add_i32 v11 v14
499 v16 = pack v10 v15 8
500 v17 = extract v0 16 v1
501 v18 = extract v3 16 v1
502 v19 = mul_i32 v18 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500503 v20 = shr_i32 v19 8
Mike Kleind48488b2019-10-22 12:27:58 -0500504 v21 = add_i32 v17 v20
505 v22 = extract v3 24 v1
506 v23 = mul_i32 v22 v7
507 v24 = shr_i32 v23 8
508 v25 = add_i32 v5 v24
509 v26 = pack v21 v25 8
510 v27 = pack v16 v26 16
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500511 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500512
Mike Kleind48488b2019-10-22 12:27:58 -05005139 registers, 29 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06005140 r0 = splat FF (3.5733111e-43)
5151 r1 = splat 100 (3.5873241e-43)
Mike Klein397fc882019-06-20 11:37:10 -0500516loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06005172 r2 = load32 arg(0)
5183 r3 = extract r2 0 r0
5194 r4 = load32 arg(1)
5205 r5 = extract r4 0 r0
5216 r6 = extract r2 24 r0
5227 r7 = sub_i32 r1 r6
5238 r5 = mul_i32 r5 r7
5249 r5 = shr_i32 r5 8
52510 r5 = add_i32 r3 r5
52611 r3 = extract r2 8 r0
52712 r8 = extract r4 8 r0
52813 r8 = mul_i32 r8 r7
52914 r8 = shr_i32 r8 8
53015 r8 = add_i32 r3 r8
53116 r8 = pack r5 r8 8
53217 r2 = extract r2 16 r0
53318 r5 = extract r4 16 r0
53419 r5 = mul_i32 r5 r7
53520 r5 = shr_i32 r5 8
53621 r5 = add_i32 r2 r5
53722 r4 = extract r4 24 r0
53823 r7 = mul_i32 r4 r7
53924 r7 = shr_i32 r7 8
54025 r7 = add_i32 r6 r7
54126 r7 = pack r5 r7 8
54227 r7 = pack r8 r7 16
54328 store32 arg(1) r7
Mike Klein397fc882019-06-20 11:37:10 -0500544
Mike Klein7b7077c2019-06-03 17:10:59 -0500545I32 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050054629 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500547 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500548 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500549 v2 = bit_and v0 v1
Mike Kleind48488b2019-10-22 12:27:58 -0500550 v3 = load32 arg(1)
551 v4 = bit_and v3 v1
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500552 v5 = shr_i32 v0 24
Mike Kleind48488b2019-10-22 12:27:58 -0500553 v6 = splat 100 (3.5873241e-43)
554 v7 = sub_i32 v6 v5
555 v8 = mul_i16x2 v4 v7
556 v9 = shr_i32 v8 8
557 v10 = add_i32 v2 v9
558 v11 = bytes v0 2
559 v12 = bytes v3 2
560 v13 = mul_i16x2 v12 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500561 v14 = shr_i32 v13 8
Mike Kleind48488b2019-10-22 12:27:58 -0500562 v15 = add_i32 v11 v14
563 v16 = pack v10 v15 8
564 v17 = bytes v0 3
565 v18 = bytes v3 3
566 v19 = mul_i16x2 v18 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500567 v20 = shr_i32 v19 8
Mike Kleind48488b2019-10-22 12:27:58 -0500568 v21 = add_i32 v17 v20
569 v22 = shr_i32 v3 24
570 v23 = mul_i16x2 v22 v7
571 v24 = shr_i32 v23 8
572 v25 = add_i32 v5 v24
573 v26 = pack v21 v25 8
574 v27 = pack v16 v26 16
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500575 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500576
Mike Kleind48488b2019-10-22 12:27:58 -05005779 registers, 29 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06005780 r0 = splat FF (3.5733111e-43)
5791 r1 = splat 100 (3.5873241e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500580loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06005812 r2 = load32 arg(0)
5823 r3 = bit_and r2 r0
5834 r4 = load32 arg(1)
5845 r5 = bit_and r4 r0
5856 r6 = shr_i32 r2 24
5867 r7 = sub_i32 r1 r6
5878 r5 = mul_i16x2 r5 r7
5889 r5 = shr_i32 r5 8
58910 r5 = add_i32 r3 r5
59011 r3 = bytes r2 2
59112 r8 = bytes r4 2
59213 r8 = mul_i16x2 r8 r7
59314 r8 = shr_i32 r8 8
59415 r8 = add_i32 r3 r8
59516 r8 = pack r5 r8 8
59617 r2 = bytes r2 3
59718 r5 = bytes r4 3
59819 r5 = mul_i16x2 r5 r7
59920 r5 = shr_i32 r5 8
60021 r5 = add_i32 r2 r5
60122 r4 = shr_i32 r4 24
60223 r7 = mul_i16x2 r4 r7
60324 r7 = shr_i32 r7 8
60425 r7 = add_i32 r6 r7
60526 r7 = pack r5 r7 8
60627 r7 = pack r8 r7 16
60728 store32 arg(1) r7
Mike Klein821f5e82019-06-13 10:56:51 -0500608
609I32 (SWAR) 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050061015 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500611 v0 = load32 arg(0)
612 v1 = bytes v0 404
Mike Klein5e533c92019-07-22 13:44:54 -0500613 v2 = splat 1000100 (2.3510604e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500614 v3 = sub_i16x2 v2 v1
615 v4 = load32 arg(1)
Mike Klein5e533c92019-07-22 13:44:54 -0500616 v5 = splat FF00FF (2.3418409e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500617 v6 = bit_and v4 v5
Mike Kleind48488b2019-10-22 12:27:58 -0500618 v7 = mul_i16x2 v6 v3
619 v8 = shr_i16x2 v7 8
620 v9 = shr_i16x2 v4 8
621 v10 = mul_i16x2 v9 v3
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500622 v11 = bit_clear v10 v5
Mike Kleind48488b2019-10-22 12:27:58 -0500623 v12 = bit_or v8 v11
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500624 v13 = add_i32 v0 v12
625 store32 arg(1) v13
Mike Kleinaab45b52019-07-02 15:39:23 -0500626
Mike Klein2b7b2a22019-06-23 20:35:28 -04006276 registers, 15 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006280 r0 = splat 1000100 (2.3510604e-38)
6291 r1 = splat FF00FF (2.3418409e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500630loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06006312 r2 = load32 arg(0)
6323 r3 = bytes r2 404
6334 r3 = sub_i16x2 r0 r3
6345 r4 = load32 arg(1)
6356 r5 = bit_and r4 r1
6367 r5 = mul_i16x2 r5 r3
6378 r5 = shr_i16x2 r5 8
6389 r4 = shr_i16x2 r4 8
63910 r3 = mul_i16x2 r4 r3
64011 r3 = bit_clear r3 r1
64112 r3 = bit_or r5 r3
64213 r3 = add_i32 r2 r3
64314 store32 arg(1) r3
Mike Klein7b7077c2019-06-03 17:10:59 -0500644
Mike Kleinf9963112019-08-08 15:13:25 -04006456 values:
Mike Klein0f61c122019-10-16 10:46:01 -0500646 v0 = splat 1 (1.4012985e-45)
647 v1 = splat 2 (2.8025969e-45)
Mike Kleinf9963112019-08-08 15:13:25 -0400648 v2 = add_i32 v0 v1
649 v3 = load32 arg(0)
650 v4 = mul_i32 v3 v2
651 store32 arg(0) v4
652
6532 registers, 6 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006540 r0 = splat 1 (1.4012985e-45)
6551 r1 = splat 2 (2.8025969e-45)
6562 r1 = add_i32 r0 r1
Mike Kleinf9963112019-08-08 15:13:25 -0400657loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06006583 r0 = load32 arg(0)
6594 r0 = mul_i32 r0 r1
6605 store32 arg(0) r0
Mike Kleinf9963112019-08-08 15:13:25 -0400661
Mike Kleind48488b2019-10-22 12:27:58 -050066219 values:
663 v0 = splat FF (3.5733111e-43)
664 v1 = load32 arg(0)
665 v2 = extract v1 0 v0
666 v3 = load32 arg(1)
667 v4 = extract v3 0 v0
668 v5 = add_i32 v2 v4
669 v6 = extract v1 8 v0
670 v7 = extract v3 8 v0
671 v8 = add_i32 v6 v7
672 v9 = pack v5 v8 8
673 v10 = extract v1 16 v0
674 v11 = extract v3 16 v0
675 v12 = add_i32 v10 v11
676 v13 = extract v1 24 v0
677 v14 = extract v3 24 v0
678 v15 = add_i32 v13 v14
679 v16 = pack v12 v15 8
680 v17 = pack v9 v16 16
681 store32 arg(1) v17
682
6836 registers, 19 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006840 r0 = splat FF (3.5733111e-43)
Mike Kleind48488b2019-10-22 12:27:58 -0500685loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06006861 r1 = load32 arg(0)
6872 r2 = extract r1 0 r0
6883 r3 = load32 arg(1)
6894 r4 = extract r3 0 r0
6905 r4 = add_i32 r2 r4
6916 r2 = extract r1 8 r0
6927 r5 = extract r3 8 r0
6938 r5 = add_i32 r2 r5
6949 r5 = pack r4 r5 8
69510 r4 = extract r1 16 r0
69611 r2 = extract r3 16 r0
69712 r2 = add_i32 r4 r2
69813 r1 = extract r1 24 r0
69914 r3 = extract r3 24 r0
70015 r3 = add_i32 r1 r3
70116 r3 = pack r2 r3 8
70217 r3 = pack r5 r3 16
70318 store32 arg(1) r3
Mike Kleind48488b2019-10-22 12:27:58 -0500704