blob: 8d7144412d4d60b468b5492514284a181b4c2ff0 [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Kleined9b1f12020-02-06 13:02:32 -0600212 values (originally 16):
Mike Klein8c1e0ef2019-11-12 09:07:23 -06003 v0 = load8 arg(0)
4 v1 = to_f32 v0
5 v2 = mul_f32 v1 3B808081 (0.0039215689)
6 v3 = load8 arg(1)
7 v4 = to_f32 v3
8 v5 = mul_f32 v4 3B808081 (0.0039215689)
9 v6 = splat 3F800000 (1)
10 v7 = sub_f32 v6 v2
11 v8 = mad_f32 v5 v7 v2
12 v9 = mul_f32 v8 437F0000 (255)
13 v10 = round v9
14 store8 arg(1) v10
Mike Klein22ea7e92019-06-10 12:05:48 -050015
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600164 registers, 12 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -0600170 r0 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -050018loop:
Mike Kleinb5c43552020-01-07 11:39:30 -0600191 r1 = load8 arg(0)
202 r1 = to_f32 r1
213 r1 = mul_f32 r1 3B808081 (0.0039215689)
224 r2 = load8 arg(1)
235 r2 = to_f32 r2
246 r2 = mul_f32 r2 3B808081 (0.0039215689)
257 r3 = sub_f32 r0 r1
268 r1 = mad_f32 r2 r3 r1
279 r1 = mul_f32 r1 437F0000 (255)
2810 r1 = round r1
2911 store8 arg(1) r1
Mike Klein8c1e0ef2019-11-12 09:07:23 -060030
31A8 over G8
Mike Kleined9b1f12020-02-06 13:02:32 -06003217 values (originally 22):
Mike Klein8c1e0ef2019-11-12 09:07:23 -060033 v0 = load8 arg(1)
34 v1 = to_f32 v0
35 v2 = mul_f32 v1 3B808081 (0.0039215689)
36 v3 = load8 arg(0)
37 v4 = to_f32 v3
38 v5 = mul_f32 v4 3B808081 (0.0039215689)
39 v6 = splat 3F800000 (1)
40 v7 = sub_f32 v6 v5
41 v8 = mul_f32 v2 v7
42 v9 = splat 3E59B3D0 (0.21259999)
43 v10 = splat 3F371759 (0.71520001)
44 v11 = mul_f32 v8 3D93DD98 (0.0722)
45 v12 = mad_f32 v8 v10 v11
46 v13 = mad_f32 v8 v9 v12
47 v14 = mul_f32 v13 437F0000 (255)
48 v15 = round v14
49 store8 arg(1) v15
50
515 registers, 17 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -0600520 r0 = splat 3F800000 (1)
531 r1 = splat 3E59B3D0 (0.21259999)
542 r2 = splat 3F371759 (0.71520001)
Mike Klein8c1e0ef2019-11-12 09:07:23 -060055loop:
Mike Kleinb5c43552020-01-07 11:39:30 -0600563 r3 = load8 arg(1)
574 r3 = to_f32 r3
585 r3 = mul_f32 r3 3B808081 (0.0039215689)
596 r4 = load8 arg(0)
607 r4 = to_f32 r4
618 r4 = mul_f32 r4 3B808081 (0.0039215689)
629 r4 = sub_f32 r0 r4
6310 r4 = mul_f32 r3 r4
6411 r3 = mul_f32 r4 3D93DD98 (0.0722)
6512 r3 = mad_f32 r4 r2 r3
6613 r3 = mad_f32 r4 r1 r3
6714 r3 = mul_f32 r3 437F0000 (255)
6815 r3 = round r3
6916 store8 arg(1) r3
Mike Klein267f5072019-06-03 16:27:46 -050070
Mike Klein754bad32019-06-05 10:47:46 -050071A8 over RGBA_8888
Mike Kleined9b1f12020-02-06 13:02:32 -06007236 values (originally 40):
Mike Klein8c1e0ef2019-11-12 09:07:23 -060073 v0 = load32 arg(1)
Mike Kleina6434a52020-01-08 14:06:52 -060074 v1 = bit_and v0 FF
75 v2 = to_f32 v1
76 v3 = mul_f32 v2 3B808081 (0.0039215689)
77 v4 = load8 arg(0)
78 v5 = to_f32 v4
79 v6 = mul_f32 v5 3B808081 (0.0039215689)
80 v7 = splat 3F800000 (1)
81 v8 = sub_f32 v7 v6
82 v9 = mul_f32 v3 v8
83 v10 = mul_f32 v9 437F0000 (255)
84 v11 = round v10
85 v12 = shr_i32 v0 8
86 v13 = bit_and v12 FF
Mike Klein8c1e0ef2019-11-12 09:07:23 -060087 v14 = to_f32 v13
88 v15 = mul_f32 v14 3B808081 (0.0039215689)
Mike Kleina6434a52020-01-08 14:06:52 -060089 v16 = mul_f32 v15 v8
Mike Klein8c1e0ef2019-11-12 09:07:23 -060090 v17 = mul_f32 v16 437F0000 (255)
91 v18 = round v17
Mike Kleina6434a52020-01-08 14:06:52 -060092 v19 = pack v11 v18 8
93 v20 = shr_i32 v0 16
94 v21 = bit_and v20 FF
95 v22 = to_f32 v21
96 v23 = mul_f32 v22 3B808081 (0.0039215689)
97 v24 = mul_f32 v23 v8
98 v25 = mul_f32 v24 437F0000 (255)
99 v26 = round v25
100 v27 = shr_i32 v0 24
Mike Klein57bdb242020-01-08 15:25:07 -0600101 v28 = to_f32 v27
102 v29 = mul_f32 v28 3B808081 (0.0039215689)
103 v30 = mad_f32 v29 v8 v6
104 v31 = mul_f32 v30 437F0000 (255)
105 v32 = round v31
106 v33 = pack v26 v32 8
107 v34 = pack v19 v33 16
108 store32 arg(1) v34
Mike Klein22ea7e92019-06-10 12:05:48 -0500109
Mike Klein57bdb242020-01-08 15:25:07 -06001106 registers, 36 instructions:
Mike Kleina6434a52020-01-08 14:06:52 -06001110 r0 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -0500112loop:
Mike Kleina6434a52020-01-08 14:06:52 -06001131 r1 = load32 arg(1)
1142 r2 = bit_and r1 FF
1153 r2 = to_f32 r2
1164 r2 = mul_f32 r2 3B808081 (0.0039215689)
1175 r3 = load8 arg(0)
1186 r3 = to_f32 r3
1197 r3 = mul_f32 r3 3B808081 (0.0039215689)
1208 r4 = sub_f32 r0 r3
1219 r2 = mul_f32 r2 r4
12210 r2 = mul_f32 r2 437F0000 (255)
12311 r2 = round r2
12412 r5 = shr_i32 r1 8
12513 r5 = bit_and r5 FF
12614 r5 = to_f32 r5
12715 r5 = mul_f32 r5 3B808081 (0.0039215689)
12816 r5 = mul_f32 r5 r4
12917 r5 = mul_f32 r5 437F0000 (255)
13018 r5 = round r5
13119 r5 = pack r2 r5 8
13220 r2 = shr_i32 r1 16
13321 r2 = bit_and r2 FF
13422 r2 = to_f32 r2
13523 r2 = mul_f32 r2 3B808081 (0.0039215689)
13624 r2 = mul_f32 r2 r4
13725 r2 = mul_f32 r2 437F0000 (255)
13826 r2 = round r2
13927 r1 = shr_i32 r1 24
Mike Klein57bdb242020-01-08 15:25:07 -060014028 r1 = to_f32 r1
14129 r1 = mul_f32 r1 3B808081 (0.0039215689)
14230 r3 = mad_f32 r1 r4 r3
14331 r3 = mul_f32 r3 437F0000 (255)
14432 r3 = round r3
14533 r3 = pack r2 r3 8
14634 r3 = pack r5 r3 16
14735 store32 arg(1) r3
Mike Klein754bad32019-06-05 10:47:46 -0500148
149G8 over A8
Mike Kleined9b1f12020-02-06 13:02:32 -06001509 values (originally 15):
Mike Kleind48488b2019-10-22 12:27:58 -0500151 v0 = splat 3F800000 (1)
Mike Klein1cb05992019-11-21 11:46:58 -0600152 v1 = splat 0 (0)
153 v2 = load8 arg(1)
154 v3 = to_f32 v2
155 v4 = mul_f32 v3 3B808081 (0.0039215689)
156 v5 = mad_f32 v4 v1 v0
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600157 v6 = mul_f32 v5 437F0000 (255)
158 v7 = round v6
159 store8 arg(1) v7
Mike Klein22ea7e92019-06-10 12:05:48 -0500160
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001613 registers, 9 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06001620 r0 = splat 3F800000 (1)
1631 r1 = splat 0 (0)
Mike Klein754bad32019-06-05 10:47:46 -0500164loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06001652 r2 = load8 arg(1)
1663 r2 = to_f32 r2
1674 r2 = mul_f32 r2 3B808081 (0.0039215689)
1685 r2 = mad_f32 r2 r1 r0
1696 r2 = mul_f32 r2 437F0000 (255)
1707 r2 = round r2
1718 store8 arg(1) r2
Mike Klein754bad32019-06-05 10:47:46 -0500172
173G8 over G8
Mike Kleined9b1f12020-02-06 13:02:32 -060017416 values (originally 20):
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600175 v0 = load8 arg(0)
176 v1 = to_f32 v0
177 v2 = mul_f32 v1 3B808081 (0.0039215689)
178 v3 = load8 arg(1)
Mike Kleind48488b2019-10-22 12:27:58 -0500179 v4 = to_f32 v3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600180 v5 = mul_f32 v4 3B808081 (0.0039215689)
Mike Klein1cb05992019-11-21 11:46:58 -0600181 v6 = splat 0 (0)
182 v7 = mad_f32 v5 v6 v2
183 v8 = splat 3E59B3D0 (0.21259999)
184 v9 = splat 3F371759 (0.71520001)
185 v10 = mul_f32 v7 3D93DD98 (0.0722)
186 v11 = mad_f32 v7 v9 v10
187 v12 = mad_f32 v7 v8 v11
188 v13 = mul_f32 v12 437F0000 (255)
189 v14 = round v13
190 store8 arg(1) v14
Mike Klein22ea7e92019-06-10 12:05:48 -0500191
Mike Klein1cb05992019-11-21 11:46:58 -06001925 registers, 16 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06001930 r0 = splat 0 (0)
1941 r1 = splat 3E59B3D0 (0.21259999)
1952 r2 = splat 3F371759 (0.71520001)
Mike Klein754bad32019-06-05 10:47:46 -0500196loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06001973 r3 = load8 arg(0)
1984 r3 = to_f32 r3
1995 r3 = mul_f32 r3 3B808081 (0.0039215689)
2006 r4 = load8 arg(1)
2017 r4 = to_f32 r4
2028 r4 = mul_f32 r4 3B808081 (0.0039215689)
2039 r3 = mad_f32 r4 r0 r3
20410 r4 = mul_f32 r3 3D93DD98 (0.0722)
20511 r4 = mad_f32 r3 r2 r4
20612 r4 = mad_f32 r3 r1 r4
20713 r4 = mul_f32 r4 437F0000 (255)
20814 r4 = round r4
20915 store8 arg(1) r4
Mike Klein754bad32019-06-05 10:47:46 -0500210
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600211G8 over RGBA_8888
Mike Kleined9b1f12020-02-06 13:02:32 -060021236 values (originally 39):
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600213 v0 = load8 arg(0)
214 v1 = to_f32 v0
215 v2 = mul_f32 v1 3B808081 (0.0039215689)
216 v3 = load32 arg(1)
Mike Kleina6434a52020-01-08 14:06:52 -0600217 v4 = bit_and v3 FF
218 v5 = to_f32 v4
219 v6 = mul_f32 v5 3B808081 (0.0039215689)
220 v7 = splat 0 (0)
221 v8 = mad_f32 v6 v7 v2
222 v9 = mul_f32 v8 437F0000 (255)
223 v10 = round v9
224 v11 = shr_i32 v3 8
225 v12 = bit_and v11 FF
Mike Klein1cb05992019-11-21 11:46:58 -0600226 v13 = to_f32 v12
227 v14 = mul_f32 v13 3B808081 (0.0039215689)
Mike Kleina6434a52020-01-08 14:06:52 -0600228 v15 = mad_f32 v14 v7 v2
Mike Klein1cb05992019-11-21 11:46:58 -0600229 v16 = mul_f32 v15 437F0000 (255)
230 v17 = round v16
Mike Kleina6434a52020-01-08 14:06:52 -0600231 v18 = pack v10 v17 8
232 v19 = shr_i32 v3 16
233 v20 = bit_and v19 FF
234 v21 = to_f32 v20
235 v22 = mul_f32 v21 3B808081 (0.0039215689)
236 v23 = mad_f32 v22 v7 v2
237 v24 = mul_f32 v23 437F0000 (255)
238 v25 = round v24
239 v26 = splat 3F800000 (1)
240 v27 = shr_i32 v3 24
Mike Klein57bdb242020-01-08 15:25:07 -0600241 v28 = to_f32 v27
242 v29 = mul_f32 v28 3B808081 (0.0039215689)
243 v30 = mad_f32 v29 v7 v26
244 v31 = mul_f32 v30 437F0000 (255)
245 v32 = round v31
246 v33 = pack v25 v32 8
247 v34 = pack v18 v33 16
248 store32 arg(1) v34
Mike Klein22ea7e92019-06-10 12:05:48 -0500249
Mike Klein57bdb242020-01-08 15:25:07 -06002506 registers, 36 instructions:
Mike Kleina6434a52020-01-08 14:06:52 -06002510 r0 = splat 0 (0)
2521 r1 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600253loop:
Mike Kleina6434a52020-01-08 14:06:52 -06002542 r2 = load8 arg(0)
2553 r2 = to_f32 r2
2564 r2 = mul_f32 r2 3B808081 (0.0039215689)
2575 r3 = load32 arg(1)
2586 r4 = bit_and r3 FF
2597 r4 = to_f32 r4
2608 r4 = mul_f32 r4 3B808081 (0.0039215689)
2619 r4 = mad_f32 r4 r0 r2
26210 r4 = mul_f32 r4 437F0000 (255)
26311 r4 = round r4
26412 r5 = shr_i32 r3 8
26513 r5 = bit_and r5 FF
26614 r5 = to_f32 r5
26715 r5 = mul_f32 r5 3B808081 (0.0039215689)
26816 r5 = mad_f32 r5 r0 r2
26917 r5 = mul_f32 r5 437F0000 (255)
27018 r5 = round r5
27119 r5 = pack r4 r5 8
27220 r4 = shr_i32 r3 16
27321 r4 = bit_and r4 FF
27422 r4 = to_f32 r4
27523 r4 = mul_f32 r4 3B808081 (0.0039215689)
27624 r2 = mad_f32 r4 r0 r2
27725 r2 = mul_f32 r2 437F0000 (255)
27826 r2 = round r2
27927 r3 = shr_i32 r3 24
Mike Klein57bdb242020-01-08 15:25:07 -060028028 r3 = to_f32 r3
28129 r3 = mul_f32 r3 3B808081 (0.0039215689)
28230 r3 = mad_f32 r3 r0 r1
28331 r3 = mul_f32 r3 437F0000 (255)
28432 r3 = round r3
28533 r3 = pack r2 r3 8
28634 r3 = pack r5 r3 16
28735 store32 arg(1) r3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600288
289RGBA_8888 over A8
Mike Kleined9b1f12020-02-06 13:02:32 -060029013 values (originally 31):
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600291 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600292 v1 = shr_i32 v0 24
Mike Klein57bdb242020-01-08 15:25:07 -0600293 v2 = to_f32 v1
294 v3 = mul_f32 v2 3B808081 (0.0039215689)
295 v4 = load8 arg(1)
296 v5 = to_f32 v4
297 v6 = mul_f32 v5 3B808081 (0.0039215689)
298 v7 = splat 3F800000 (1)
299 v8 = sub_f32 v7 v3
300 v9 = mad_f32 v6 v8 v3
301 v10 = mul_f32 v9 437F0000 (255)
302 v11 = round v10
303 store8 arg(1) v11
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600304
Mike Klein57bdb242020-01-08 15:25:07 -06003054 registers, 13 instructions:
Mike Kleina6434a52020-01-08 14:06:52 -06003060 r0 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600307loop:
Mike Kleina6434a52020-01-08 14:06:52 -06003081 r1 = load32 arg(0)
3092 r1 = shr_i32 r1 24
Mike Klein57bdb242020-01-08 15:25:07 -06003103 r1 = to_f32 r1
3114 r1 = mul_f32 r1 3B808081 (0.0039215689)
3125 r2 = load8 arg(1)
3136 r2 = to_f32 r2
3147 r2 = mul_f32 r2 3B808081 (0.0039215689)
3158 r3 = sub_f32 r0 r1
3169 r1 = mad_f32 r2 r3 r1
31710 r1 = mul_f32 r1 437F0000 (255)
31811 r1 = round r1
31912 store8 arg(1) r1
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600320
321RGBA_8888 over G8
Mike Kleined9b1f12020-02-06 13:02:32 -060032231 values (originally 36):
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600323 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600324 v1 = bit_and v0 FF
325 v2 = to_f32 v1
326 v3 = mul_f32 v2 3B808081 (0.0039215689)
327 v4 = load8 arg(1)
328 v5 = to_f32 v4
329 v6 = mul_f32 v5 3B808081 (0.0039215689)
330 v7 = shr_i32 v0 24
Mike Klein57bdb242020-01-08 15:25:07 -0600331 v8 = to_f32 v7
332 v9 = mul_f32 v8 3B808081 (0.0039215689)
333 v10 = splat 3F800000 (1)
334 v11 = sub_f32 v10 v9
335 v12 = mad_f32 v6 v11 v3
336 v13 = splat 3E59B3D0 (0.21259999)
337 v14 = shr_i32 v0 8
338 v15 = bit_and v14 FF
339 v16 = to_f32 v15
340 v17 = mul_f32 v16 3B808081 (0.0039215689)
341 v18 = mad_f32 v6 v11 v17
342 v19 = splat 3F371759 (0.71520001)
343 v20 = shr_i32 v0 16
344 v21 = bit_and v20 FF
345 v22 = to_f32 v21
346 v23 = mul_f32 v22 3B808081 (0.0039215689)
347 v24 = mad_f32 v6 v11 v23
348 v25 = mul_f32 v24 3D93DD98 (0.0722)
349 v26 = mad_f32 v18 v19 v25
350 v27 = mad_f32 v12 v13 v26
351 v28 = mul_f32 v27 437F0000 (255)
352 v29 = round v28
353 store8 arg(1) v29
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600354
Mike Klein57bdb242020-01-08 15:25:07 -06003558 registers, 31 instructions:
Mike Kleina6434a52020-01-08 14:06:52 -06003560 r0 = splat 3F800000 (1)
3571 r1 = splat 3E59B3D0 (0.21259999)
3582 r2 = splat 3F371759 (0.71520001)
Mike Kleina6307322019-06-07 15:44:26 -0500359loop:
Mike Kleina6434a52020-01-08 14:06:52 -06003603 r3 = load32 arg(0)
3614 r4 = bit_and r3 FF
3625 r4 = to_f32 r4
3636 r4 = mul_f32 r4 3B808081 (0.0039215689)
3647 r5 = load8 arg(1)
3658 r5 = to_f32 r5
3669 r5 = mul_f32 r5 3B808081 (0.0039215689)
36710 r6 = shr_i32 r3 24
Mike Klein57bdb242020-01-08 15:25:07 -060036811 r6 = to_f32 r6
36912 r6 = mul_f32 r6 3B808081 (0.0039215689)
37013 r6 = sub_f32 r0 r6
37114 r4 = mad_f32 r5 r6 r4
37215 r7 = shr_i32 r3 8
37316 r7 = bit_and r7 FF
37417 r7 = to_f32 r7
37518 r7 = mul_f32 r7 3B808081 (0.0039215689)
37619 r7 = mad_f32 r5 r6 r7
37720 r3 = shr_i32 r3 16
37821 r3 = bit_and r3 FF
37922 r3 = to_f32 r3
38023 r3 = mul_f32 r3 3B808081 (0.0039215689)
38124 r3 = mad_f32 r5 r6 r3
38225 r3 = mul_f32 r3 3D93DD98 (0.0722)
38326 r3 = mad_f32 r7 r2 r3
38427 r3 = mad_f32 r4 r1 r3
38528 r3 = mul_f32 r3 437F0000 (255)
38629 r3 = round r3
38730 store8 arg(1) r3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600388
389RGBA_8888 over RGBA_8888
Mike Kleined9b1f12020-02-06 13:02:32 -060039048 values (originally 51):
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600391 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600392 v1 = bit_and v0 FF
393 v2 = to_f32 v1
394 v3 = mul_f32 v2 3B808081 (0.0039215689)
395 v4 = load32 arg(1)
396 v5 = bit_and v4 FF
397 v6 = to_f32 v5
398 v7 = mul_f32 v6 3B808081 (0.0039215689)
399 v8 = shr_i32 v0 24
Mike Klein57bdb242020-01-08 15:25:07 -0600400 v9 = to_f32 v8
401 v10 = mul_f32 v9 3B808081 (0.0039215689)
402 v11 = splat 3F800000 (1)
403 v12 = sub_f32 v11 v10
404 v13 = mad_f32 v7 v12 v3
405 v14 = mul_f32 v13 437F0000 (255)
406 v15 = round v14
407 v16 = shr_i32 v0 8
408 v17 = bit_and v16 FF
409 v18 = to_f32 v17
410 v19 = mul_f32 v18 3B808081 (0.0039215689)
411 v20 = shr_i32 v4 8
412 v21 = bit_and v20 FF
413 v22 = to_f32 v21
414 v23 = mul_f32 v22 3B808081 (0.0039215689)
415 v24 = mad_f32 v23 v12 v19
416 v25 = mul_f32 v24 437F0000 (255)
417 v26 = round v25
418 v27 = pack v15 v26 8
419 v28 = shr_i32 v0 16
420 v29 = bit_and v28 FF
421 v30 = to_f32 v29
422 v31 = mul_f32 v30 3B808081 (0.0039215689)
423 v32 = shr_i32 v4 16
424 v33 = bit_and v32 FF
425 v34 = to_f32 v33
426 v35 = mul_f32 v34 3B808081 (0.0039215689)
427 v36 = mad_f32 v35 v12 v31
428 v37 = mul_f32 v36 437F0000 (255)
429 v38 = round v37
430 v39 = shr_i32 v4 24
431 v40 = to_f32 v39
432 v41 = mul_f32 v40 3B808081 (0.0039215689)
433 v42 = mad_f32 v41 v12 v10
434 v43 = mul_f32 v42 437F0000 (255)
435 v44 = round v43
436 v45 = pack v38 v44 8
437 v46 = pack v27 v45 16
438 store32 arg(1) v46
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600439
Mike Klein57bdb242020-01-08 15:25:07 -06004408 registers, 48 instructions:
Mike Kleina6434a52020-01-08 14:06:52 -06004410 r0 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600442loop:
Mike Kleina6434a52020-01-08 14:06:52 -06004431 r1 = load32 arg(0)
4442 r2 = bit_and r1 FF
4453 r2 = to_f32 r2
4464 r2 = mul_f32 r2 3B808081 (0.0039215689)
4475 r3 = load32 arg(1)
4486 r4 = bit_and r3 FF
4497 r4 = to_f32 r4
4508 r4 = mul_f32 r4 3B808081 (0.0039215689)
4519 r5 = shr_i32 r1 24
Mike Klein57bdb242020-01-08 15:25:07 -060045210 r5 = to_f32 r5
45311 r5 = mul_f32 r5 3B808081 (0.0039215689)
45412 r6 = sub_f32 r0 r5
45513 r2 = mad_f32 r4 r6 r2
45614 r2 = mul_f32 r2 437F0000 (255)
45715 r2 = round r2
45816 r4 = shr_i32 r1 8
45917 r4 = bit_and r4 FF
46018 r4 = to_f32 r4
46119 r4 = mul_f32 r4 3B808081 (0.0039215689)
46220 r7 = shr_i32 r3 8
46321 r7 = bit_and r7 FF
46422 r7 = to_f32 r7
46523 r7 = mul_f32 r7 3B808081 (0.0039215689)
46624 r4 = mad_f32 r7 r6 r4
46725 r4 = mul_f32 r4 437F0000 (255)
46826 r4 = round r4
46927 r4 = pack r2 r4 8
47028 r1 = shr_i32 r1 16
47129 r1 = bit_and r1 FF
47230 r1 = to_f32 r1
47331 r1 = mul_f32 r1 3B808081 (0.0039215689)
47432 r2 = shr_i32 r3 16
47533 r2 = bit_and r2 FF
47634 r2 = to_f32 r2
47735 r2 = mul_f32 r2 3B808081 (0.0039215689)
47836 r1 = mad_f32 r2 r6 r1
47937 r1 = mul_f32 r1 437F0000 (255)
48038 r1 = round r1
48139 r3 = shr_i32 r3 24
48240 r3 = to_f32 r3
48341 r3 = mul_f32 r3 3B808081 (0.0039215689)
48442 r5 = mad_f32 r3 r6 r5
48543 r5 = mul_f32 r5 437F0000 (255)
48644 r5 = round r5
48745 r5 = pack r1 r5 8
48846 r5 = pack r4 r5 16
48947 store32 arg(1) r5
Mike Klein267f5072019-06-03 16:27:46 -0500490
Mike Klein397fc882019-06-20 11:37:10 -0500491I32 (Naive) 8888 over 8888
Mike Kleined9b1f12020-02-06 13:02:32 -060049232 values (originally 33):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500493 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600494 v1 = bit_and v0 FF
495 v2 = load32 arg(1)
496 v3 = bit_and v2 FF
497 v4 = shr_i32 v0 24
Mike Klein57bdb242020-01-08 15:25:07 -0600498 v5 = splat 100 (3.5873241e-43)
499 v6 = sub_i32 v5 v4
500 v7 = mul_i32 v3 v6
501 v8 = shr_i32 v7 8
502 v9 = add_i32 v1 v8
503 v10 = shr_i32 v0 8
504 v11 = bit_and v10 FF
505 v12 = shr_i32 v2 8
506 v13 = bit_and v12 FF
507 v14 = mul_i32 v13 v6
508 v15 = shr_i32 v14 8
509 v16 = add_i32 v11 v15
510 v17 = pack v9 v16 8
511 v18 = shr_i32 v0 16
512 v19 = bit_and v18 FF
513 v20 = shr_i32 v2 16
514 v21 = bit_and v20 FF
515 v22 = mul_i32 v21 v6
516 v23 = shr_i32 v22 8
517 v24 = add_i32 v19 v23
518 v25 = shr_i32 v2 24
519 v26 = mul_i32 v25 v6
520 v27 = shr_i32 v26 8
521 v28 = add_i32 v4 v27
522 v29 = pack v24 v28 8
523 v30 = pack v17 v29 16
524 store32 arg(1) v30
Mike Kleinaab45b52019-07-02 15:39:23 -0500525
Mike Klein57bdb242020-01-08 15:25:07 -06005268 registers, 32 instructions:
Mike Kleina6434a52020-01-08 14:06:52 -06005270 r0 = splat 100 (3.5873241e-43)
Mike Klein397fc882019-06-20 11:37:10 -0500528loop:
Mike Kleina6434a52020-01-08 14:06:52 -06005291 r1 = load32 arg(0)
5302 r2 = bit_and r1 FF
5313 r3 = load32 arg(1)
5324 r4 = bit_and r3 FF
5335 r5 = shr_i32 r1 24
Mike Klein57bdb242020-01-08 15:25:07 -06005346 r6 = sub_i32 r0 r5
5357 r4 = mul_i32 r4 r6
5368 r4 = shr_i32 r4 8
5379 r4 = add_i32 r2 r4
53810 r2 = shr_i32 r1 8
53911 r2 = bit_and r2 FF
54012 r7 = shr_i32 r3 8
54113 r7 = bit_and r7 FF
54214 r7 = mul_i32 r7 r6
54315 r7 = shr_i32 r7 8
54416 r7 = add_i32 r2 r7
54517 r7 = pack r4 r7 8
54618 r1 = shr_i32 r1 16
54719 r1 = bit_and r1 FF
54820 r4 = shr_i32 r3 16
54921 r4 = bit_and r4 FF
55022 r4 = mul_i32 r4 r6
55123 r4 = shr_i32 r4 8
55224 r4 = add_i32 r1 r4
55325 r3 = shr_i32 r3 24
55426 r6 = mul_i32 r3 r6
55527 r6 = shr_i32 r6 8
55628 r6 = add_i32 r5 r6
55729 r6 = pack r4 r6 8
55830 r6 = pack r7 r6 16
55931 store32 arg(1) r6
Mike Klein397fc882019-06-20 11:37:10 -0500560
Mike Klein7b7077c2019-06-03 17:10:59 -0500561I32 8888 over 8888
Mike Kleined9b1f12020-02-06 13:02:32 -060056228 values (originally 29):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500563 v0 = load32 arg(0)
Mike Kleina6434a52020-01-08 14:06:52 -0600564 v1 = bit_and v0 FF
565 v2 = load32 arg(1)
566 v3 = bit_and v2 FF
567 v4 = shr_i32 v0 24
568 v5 = splat 100 (3.5873241e-43)
569 v6 = sub_i32 v5 v4
570 v7 = mul_i16x2 v3 v6
571 v8 = shr_i32 v7 8
572 v9 = add_i32 v1 v8
573 v10 = bytes v0 2
574 v11 = bytes v2 2
575 v12 = mul_i16x2 v11 v6
576 v13 = shr_i32 v12 8
577 v14 = add_i32 v10 v13
578 v15 = pack v9 v14 8
579 v16 = bytes v0 3
580 v17 = bytes v2 3
581 v18 = mul_i16x2 v17 v6
582 v19 = shr_i32 v18 8
583 v20 = add_i32 v16 v19
584 v21 = shr_i32 v2 24
585 v22 = mul_i16x2 v21 v6
586 v23 = shr_i32 v22 8
587 v24 = add_i32 v4 v23
588 v25 = pack v20 v24 8
589 v26 = pack v15 v25 16
590 store32 arg(1) v26
Mike Kleinaab45b52019-07-02 15:39:23 -0500591
Mike Kleina6434a52020-01-08 14:06:52 -06005928 registers, 28 instructions:
5930 r0 = splat 100 (3.5873241e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500594loop:
Mike Kleina6434a52020-01-08 14:06:52 -06005951 r1 = load32 arg(0)
5962 r2 = bit_and r1 FF
5973 r3 = load32 arg(1)
5984 r4 = bit_and r3 FF
5995 r5 = shr_i32 r1 24
6006 r6 = sub_i32 r0 r5
6017 r4 = mul_i16x2 r4 r6
6028 r4 = shr_i32 r4 8
6039 r4 = add_i32 r2 r4
60410 r2 = bytes r1 2
60511 r7 = bytes r3 2
60612 r7 = mul_i16x2 r7 r6
60713 r7 = shr_i32 r7 8
60814 r7 = add_i32 r2 r7
60915 r7 = pack r4 r7 8
61016 r1 = bytes r1 3
61117 r4 = bytes r3 3
61218 r4 = mul_i16x2 r4 r6
61319 r4 = shr_i32 r4 8
61420 r4 = add_i32 r1 r4
61521 r3 = shr_i32 r3 24
61622 r6 = mul_i16x2 r3 r6
61723 r6 = shr_i32 r6 8
61824 r6 = add_i32 r5 r6
61925 r6 = pack r4 r6 8
62026 r6 = pack r7 r6 16
62127 store32 arg(1) r6
Mike Klein821f5e82019-06-13 10:56:51 -0500622
623I32 (SWAR) 8888 over 8888
Mike Klein4bb61952020-02-06 15:48:46 -060062414 values (originally 15):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500625 v0 = load32 arg(0)
626 v1 = bytes v0 404
Mike Klein5e533c92019-07-22 13:44:54 -0500627 v2 = splat 1000100 (2.3510604e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500628 v3 = sub_i16x2 v2 v1
629 v4 = load32 arg(1)
Mike Kleina6434a52020-01-08 14:06:52 -0600630 v5 = bit_and v4 FF00FF
631 v6 = mul_i16x2 v5 v3
632 v7 = shr_i16x2 v6 8
633 v8 = shr_i16x2 v4 8
634 v9 = mul_i16x2 v8 v3
635 v10 = bit_and v9 FF00FF00
636 v11 = bit_or v7 v10
637 v12 = add_i32 v0 v11
638 store32 arg(1) v12
Mike Kleinaab45b52019-07-02 15:39:23 -0500639
Mike Kleina6434a52020-01-08 14:06:52 -06006405 registers, 14 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006410 r0 = splat 1000100 (2.3510604e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500642loop:
Mike Kleina6434a52020-01-08 14:06:52 -06006431 r1 = load32 arg(0)
6442 r2 = bytes r1 404
6453 r2 = sub_i16x2 r0 r2
6464 r3 = load32 arg(1)
6475 r4 = bit_and r3 FF00FF
6486 r4 = mul_i16x2 r4 r2
6497 r4 = shr_i16x2 r4 8
6508 r3 = shr_i16x2 r3 8
6519 r2 = mul_i16x2 r3 r2
65210 r2 = bit_and r2 FF00FF00
65311 r2 = bit_or r4 r2
65412 r2 = add_i32 r1 r2
65513 store32 arg(1) r2
Mike Klein7b7077c2019-06-03 17:10:59 -0500656
Mike Kleined9b1f12020-02-06 13:02:32 -06006576 values (originally 6):
Mike Klein0f61c122019-10-16 10:46:01 -0500658 v0 = splat 1 (1.4012985e-45)
659 v1 = splat 2 (2.8025969e-45)
Mike Kleinf9963112019-08-08 15:13:25 -0400660 v2 = add_i32 v0 v1
661 v3 = load32 arg(0)
662 v4 = mul_i32 v3 v2
663 store32 arg(0) v4
664
6652 registers, 6 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006660 r0 = splat 1 (1.4012985e-45)
6671 r1 = splat 2 (2.8025969e-45)
6682 r1 = add_i32 r0 r1
Mike Kleinf9963112019-08-08 15:13:25 -0400669loop:
Mike Kleinb5c43552020-01-07 11:39:30 -06006703 r0 = load32 arg(0)
6714 r0 = mul_i32 r0 r1
6725 store32 arg(0) r0
Mike Kleinf9963112019-08-08 15:13:25 -0400673
Mike Kleined9b1f12020-02-06 13:02:32 -060067422 values (originally 23):
Mike Kleina6434a52020-01-08 14:06:52 -0600675 v0 = load32 arg(0)
676 v1 = bit_and v0 FF
677 v2 = load32 arg(1)
678 v3 = bit_and v2 FF
679 v4 = add_i32 v1 v3
680 v5 = shr_i32 v0 8
681 v6 = bit_and v5 FF
682 v7 = shr_i32 v2 8
683 v8 = bit_and v7 FF
684 v9 = add_i32 v6 v8
685 v10 = pack v4 v9 8
686 v11 = shr_i32 v0 16
687 v12 = bit_and v11 FF
688 v13 = shr_i32 v2 16
689 v14 = bit_and v13 FF
690 v15 = add_i32 v12 v14
691 v16 = shr_i32 v0 24
Mike Klein57bdb242020-01-08 15:25:07 -0600692 v17 = shr_i32 v2 24
693 v18 = add_i32 v16 v17
694 v19 = pack v15 v18 8
695 v20 = pack v10 v19 16
696 store32 arg(1) v20
Mike Kleind48488b2019-10-22 12:27:58 -0500697
Mike Klein57bdb242020-01-08 15:25:07 -06006985 registers, 22 instructions:
Mike Kleind48488b2019-10-22 12:27:58 -0500699loop:
Mike Kleina6434a52020-01-08 14:06:52 -06007000 r0 = load32 arg(0)
7011 r1 = bit_and r0 FF
7022 r2 = load32 arg(1)
7033 r3 = bit_and r2 FF
7044 r3 = add_i32 r1 r3
7055 r1 = shr_i32 r0 8
7066 r1 = bit_and r1 FF
7077 r4 = shr_i32 r2 8
7088 r4 = bit_and r4 FF
7099 r4 = add_i32 r1 r4
71010 r4 = pack r3 r4 8
71111 r3 = shr_i32 r0 16
71212 r3 = bit_and r3 FF
71313 r1 = shr_i32 r2 16
71414 r1 = bit_and r1 FF
71515 r1 = add_i32 r3 r1
71616 r0 = shr_i32 r0 24
Mike Klein57bdb242020-01-08 15:25:07 -060071717 r2 = shr_i32 r2 24
71818 r2 = add_i32 r0 r2
71919 r2 = pack r1 r2 8
72020 r2 = pack r4 r2 16
72121 store32 arg(1) r2
Mike Kleind48488b2019-10-22 12:27:58 -0500722