blob: 5de1464f5e74afab5b514f31a3d7c92670ed2f68 [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein5caf7de2020-03-12 11:05:46 -0500214 values (originally 17):
Herb Derby43f76412020-03-11 16:54:35 -04003 v0 = load8 arg(0)
4 v1 = to_f32 v0
5 v2 = splat 3F800000 (1)
6 v3 = splat 3B808081 (0.0039215689)
7 v4 = fnma_f32 v1 v3 v2
8 v5 = load8 arg(1)
9 v6 = to_f32 v5
10 v7 = mul_f32 v6 v3
11 v8 = mul_f32 v1 v3
12 v9 = fma_f32 v7 v4 v8
Mike Klein5cdeb392020-02-10 12:10:36 -060013 v10 = splat 437F0000 (255)
Mike Klein5caf7de2020-03-12 11:05:46 -050014 v11 = mul_f32 v9 v10
15 v12 = round v11
16 store8 arg(1) v12
Mike Klein8c1e0ef2019-11-12 09:07:23 -060017
Mike Klein5caf7de2020-03-12 11:05:46 -0500186 registers, 14 instructions:
Herb Derby43f76412020-03-11 16:54:35 -0400190 r0 = splat 3F800000 (1)
201 r1 = splat 3B808081 (0.0039215689)
Mike Klein5cdeb392020-02-10 12:10:36 -0600212 r2 = splat 437F0000 (255)
Mike Klein8c1e0ef2019-11-12 09:07:23 -060022loop:
Mike Klein5caf7de2020-03-12 11:05:46 -0500233 r3 = load8 arg(0)
244 r3 = to_f32 r3
Herb Derby43f76412020-03-11 16:54:35 -0400255 r4 = fnma_f32 r3 r1 r0
Mike Klein5caf7de2020-03-12 11:05:46 -0500266 r5 = load8 arg(1)
277 r5 = to_f32 r5
Herb Derby43f76412020-03-11 16:54:35 -0400288 r5 = mul_f32 r5 r1
299 r3 = mul_f32 r3 r1
3010 r3 = fma_f32 r5 r4 r3
3111 r3 = mul_f32 r3 r2
3212 r3 = round r3
3313 store8 arg(1) r3
Mike Klein8c1e0ef2019-11-12 09:07:23 -060034
Mike Klein5cdeb392020-02-10 12:10:36 -060035A8 over G8
Mike Klein5caf7de2020-03-12 11:05:46 -05003619 values (originally 24):
Herb Derby43f76412020-03-11 16:54:35 -040037 v0 = load8 arg(0)
38 v1 = to_f32 v0
39 v2 = splat 3F800000 (1)
40 v3 = splat 3B808081 (0.0039215689)
41 v4 = fnma_f32 v1 v3 v2
42 v5 = load8 arg(1)
43 v6 = to_f32 v5
44 v7 = mul_f32 v6 v3
45 v8 = mul_f32 v7 v4
46 v9 = splat 3D93DD98 (0.0722)
47 v10 = mul_f32 v8 v9
48 v11 = splat 3F371759 (0.71520001)
49 v12 = fma_f32 v8 v11 v10
50 v13 = splat 3E59B3D0 (0.21259999)
51 v14 = fma_f32 v8 v13 v12
52 v15 = splat 437F0000 (255)
53 v16 = mul_f32 v14 v15
54 v17 = round v16
55 store8 arg(1) v17
56
578 registers, 19 instructions:
580 r0 = splat 3F800000 (1)
591 r1 = splat 3B808081 (0.0039215689)
602 r2 = splat 3D93DD98 (0.0722)
613 r3 = splat 3F371759 (0.71520001)
624 r4 = splat 3E59B3D0 (0.21259999)
635 r5 = splat 437F0000 (255)
64loop:
656 r6 = load8 arg(0)
667 r6 = to_f32 r6
678 r6 = fnma_f32 r6 r1 r0
689 r7 = load8 arg(1)
6910 r7 = to_f32 r7
7011 r7 = mul_f32 r7 r1
7112 r6 = mul_f32 r7 r6
7213 r7 = mul_f32 r6 r2
7314 r7 = fma_f32 r6 r3 r7
7415 r7 = fma_f32 r6 r4 r7
7516 r7 = mul_f32 r7 r5
7617 r7 = round r7
7718 store8 arg(1) r7
78
79A8 over RGBA_8888
8039 values (originally 41):
81 v0 = load8 arg(0)
82 v1 = to_f32 v0
83 v2 = splat 3F800000 (1)
84 v3 = splat 3B808081 (0.0039215689)
85 v4 = fnma_f32 v1 v3 v2
86 v5 = load32 arg(1)
87 v6 = shr_i32 v5 24
88 v7 = to_f32 v6
89 v8 = mul_f32 v7 v3
90 v9 = mul_f32 v1 v3
91 v10 = fma_f32 v8 v4 v9
92 v11 = splat 437F0000 (255)
93 v12 = mul_f32 v10 v11
94 v13 = shr_i32 v5 16
95 v14 = splat FF (3.5733111e-43)
96 v15 = bit_and v14 v13
97 v16 = round v12
98 v17 = to_f32 v15
99 v18 = mul_f32 v17 v3
100 v19 = mul_f32 v18 v4
101 v20 = mul_f32 v19 v11
102 v21 = round v20
103 v22 = pack v21 v16 8
104 v23 = shr_i32 v5 8
105 v24 = bit_and v14 v23
106 v25 = to_f32 v24
107 v26 = mul_f32 v25 v3
108 v27 = mul_f32 v26 v4
109 v28 = mul_f32 v27 v11
110 v29 = round v28
111 v30 = bit_and v14 v5
112 v31 = to_f32 v30
113 v32 = mul_f32 v31 v3
114 v33 = mul_f32 v32 v4
115 v34 = mul_f32 v33 v11
116 v35 = round v34
117 v36 = pack v35 v29 8
118 v37 = pack v36 v22 16
119 store32 arg(1) v37
120
1218 registers, 39 instructions:
1220 r0 = splat 3F800000 (1)
1231 r1 = splat 3B808081 (0.0039215689)
1242 r2 = splat 437F0000 (255)
1253 r3 = splat FF (3.5733111e-43)
126loop:
1274 r4 = load8 arg(0)
1285 r4 = to_f32 r4
1296 r5 = fnma_f32 r4 r1 r0
1307 r6 = load32 arg(1)
1318 r7 = shr_i32 r6 24
1329 r7 = to_f32 r7
13310 r7 = mul_f32 r7 r1
13411 r4 = mul_f32 r4 r1
13512 r4 = fma_f32 r7 r5 r4
13613 r4 = mul_f32 r4 r2
13714 r7 = shr_i32 r6 16
13815 r7 = bit_and r3 r7
13916 r4 = round r4
14017 r7 = to_f32 r7
14118 r7 = mul_f32 r7 r1
14219 r7 = mul_f32 r7 r5
14320 r7 = mul_f32 r7 r2
14421 r7 = round r7
14522 r4 = pack r7 r4 8
14623 r7 = shr_i32 r6 8
14724 r7 = bit_and r3 r7
14825 r7 = to_f32 r7
14926 r7 = mul_f32 r7 r1
15027 r7 = mul_f32 r7 r5
15128 r7 = mul_f32 r7 r2
15229 r7 = round r7
15330 r6 = bit_and r3 r6
15431 r6 = to_f32 r6
15532 r6 = mul_f32 r6 r1
15633 r5 = mul_f32 r6 r5
15734 r5 = mul_f32 r5 r2
15835 r5 = round r5
15936 r7 = pack r5 r7 8
16037 r4 = pack r7 r4 16
16138 store32 arg(1) r4
162
163G8 over A8
16411 values (originally 15):
165 v0 = load8 arg(1)
166 v1 = to_f32 v0
167 v2 = splat 3B808081 (0.0039215689)
168 v3 = mul_f32 v1 v2
169 v4 = splat 0 (0)
170 v5 = splat 3F800000 (1)
171 v6 = fma_f32 v3 v4 v5
172 v7 = splat 437F0000 (255)
173 v8 = mul_f32 v6 v7
174 v9 = round v8
175 store8 arg(1) v9
176
1775 registers, 11 instructions:
1780 r0 = splat 3B808081 (0.0039215689)
1791 r1 = splat 0 (0)
1802 r2 = splat 3F800000 (1)
1813 r3 = splat 437F0000 (255)
182loop:
1834 r4 = load8 arg(1)
1845 r4 = to_f32 r4
1856 r4 = mul_f32 r4 r0
1867 r4 = fma_f32 r4 r1 r2
1878 r4 = mul_f32 r4 r3
1889 r4 = round r4
18910 store8 arg(1) r4
190
191G8 over G8
19219 values (originally 23):
193 v0 = load8 arg(1)
194 v1 = to_f32 v0
195 v2 = splat 3B808081 (0.0039215689)
196 v3 = mul_f32 v1 v2
Mike Klein5cdeb392020-02-10 12:10:36 -0600197 v4 = load8 arg(0)
198 v5 = to_f32 v4
Herb Derby43f76412020-03-11 16:54:35 -0400199 v6 = mul_f32 v5 v2
200 v7 = splat 0 (0)
201 v8 = fma_f32 v3 v7 v6
202 v9 = splat 3D93DD98 (0.0722)
203 v10 = mul_f32 v8 v9
204 v11 = splat 3F371759 (0.71520001)
205 v12 = fma_f32 v8 v11 v10
206 v13 = splat 3E59B3D0 (0.21259999)
207 v14 = fma_f32 v8 v13 v12
Mike Klein7c0332c2020-03-05 14:18:04 -0600208 v15 = splat 437F0000 (255)
Mike Klein5caf7de2020-03-12 11:05:46 -0500209 v16 = mul_f32 v14 v15
210 v17 = round v16
211 store8 arg(1) v17
Mike Klein5cdeb392020-02-10 12:10:36 -0600212
Mike Klein5caf7de2020-03-12 11:05:46 -05002138 registers, 19 instructions:
Mike Klein5cdeb392020-02-10 12:10:36 -06002140 r0 = splat 3B808081 (0.0039215689)
Herb Derby43f76412020-03-11 16:54:35 -04002151 r1 = splat 0 (0)
2162 r2 = splat 3D93DD98 (0.0722)
Mike Klein5cdeb392020-02-10 12:10:36 -06002173 r3 = splat 3F371759 (0.71520001)
Herb Derby43f76412020-03-11 16:54:35 -04002184 r4 = splat 3E59B3D0 (0.21259999)
Mike Klein5cdeb392020-02-10 12:10:36 -06002195 r5 = splat 437F0000 (255)
220loop:
Mike Klein5caf7de2020-03-12 11:05:46 -05002216 r6 = load8 arg(1)
2227 r6 = to_f32 r6
2238 r6 = mul_f32 r6 r0
2249 r7 = load8 arg(0)
22510 r7 = to_f32 r7
Herb Derby43f76412020-03-11 16:54:35 -040022611 r7 = mul_f32 r7 r0
22712 r7 = fma_f32 r6 r1 r7
22813 r6 = mul_f32 r7 r2
Mike Klein5caf7de2020-03-12 11:05:46 -050022914 r6 = fma_f32 r7 r3 r6
Herb Derby43f76412020-03-11 16:54:35 -040023015 r6 = fma_f32 r7 r4 r6
Mike Klein5caf7de2020-03-12 11:05:46 -050023116 r6 = mul_f32 r6 r5
23217 r6 = round r6
23318 store8 arg(1) r6
Mike Klein5cdeb392020-02-10 12:10:36 -0600234
Mike Klein5cdeb392020-02-10 12:10:36 -0600235G8 over RGBA_8888
Mike Klein5caf7de2020-03-12 11:05:46 -050023639 values (originally 43):
Herb Derby43f76412020-03-11 16:54:35 -0400237 v0 = load32 arg(1)
238 v1 = shr_i32 v0 24
Mike Klein5cdeb392020-02-10 12:10:36 -0600239 v2 = to_f32 v1
Herb Derby43f76412020-03-11 16:54:35 -0400240 v3 = splat 3B808081 (0.0039215689)
241 v4 = mul_f32 v2 v3
242 v5 = splat 0 (0)
243 v6 = splat 3F800000 (1)
244 v7 = fma_f32 v4 v5 v6
245 v8 = splat 437F0000 (255)
246 v9 = mul_f32 v7 v8
247 v10 = shr_i32 v0 16
248 v11 = splat FF (3.5733111e-43)
249 v12 = bit_and v11 v10
250 v13 = to_f32 v12
251 v14 = mul_f32 v13 v3
252 v15 = load8 arg(0)
Mike Klein5caf7de2020-03-12 11:05:46 -0500253 v16 = to_f32 v15
Herb Derby43f76412020-03-11 16:54:35 -0400254 v17 = mul_f32 v16 v3
255 v18 = fma_f32 v14 v5 v17
256 v19 = round v9
257 v20 = mul_f32 v18 v8
258 v21 = round v20
259 v22 = pack v21 v19 8
260 v23 = shr_i32 v0 8
261 v24 = bit_and v11 v23
262 v25 = to_f32 v24
263 v26 = mul_f32 v25 v3
264 v27 = fma_f32 v26 v5 v17
265 v28 = mul_f32 v27 v8
266 v29 = round v28
267 v30 = bit_and v11 v0
Mike Klein5caf7de2020-03-12 11:05:46 -0500268 v31 = to_f32 v30
Herb Derby43f76412020-03-11 16:54:35 -0400269 v32 = mul_f32 v31 v3
270 v33 = fma_f32 v32 v5 v17
271 v34 = mul_f32 v33 v8
Mike Klein5caf7de2020-03-12 11:05:46 -0500272 v35 = round v34
Herb Derby43f76412020-03-11 16:54:35 -0400273 v36 = pack v35 v29 8
274 v37 = pack v36 v22 16
Mike Klein5caf7de2020-03-12 11:05:46 -0500275 store32 arg(1) v37
Mike Klein5cdeb392020-02-10 12:10:36 -0600276
Mike Klein5caf7de2020-03-12 11:05:46 -05002779 registers, 39 instructions:
Mike Klein5cdeb392020-02-10 12:10:36 -06002780 r0 = splat 3B808081 (0.0039215689)
Herb Derby43f76412020-03-11 16:54:35 -04002791 r1 = splat 0 (0)
2802 r2 = splat 3F800000 (1)
Mike Klein5cdeb392020-02-10 12:10:36 -06002813 r3 = splat 437F0000 (255)
Herb Derby43f76412020-03-11 16:54:35 -04002824 r4 = splat FF (3.5733111e-43)
Mike Klein5cdeb392020-02-10 12:10:36 -0600283loop:
Herb Derby43f76412020-03-11 16:54:35 -04002845 r5 = load32 arg(1)
2856 r6 = shr_i32 r5 24
2867 r6 = to_f32 r6
2878 r6 = mul_f32 r6 r0
2889 r6 = fma_f32 r6 r1 r2
28910 r6 = mul_f32 r6 r3
29011 r7 = shr_i32 r5 16
29112 r7 = bit_and r4 r7
29213 r7 = to_f32 r7
29314 r7 = mul_f32 r7 r0
29415 r8 = load8 arg(0)
29516 r8 = to_f32 r8
29617 r8 = mul_f32 r8 r0
29718 r7 = fma_f32 r7 r1 r8
29819 r6 = round r6
29920 r7 = mul_f32 r7 r3
30021 r7 = round r7
30122 r6 = pack r7 r6 8
30223 r7 = shr_i32 r5 8
30324 r7 = bit_and r4 r7
Mike Klein5caf7de2020-03-12 11:05:46 -050030425 r7 = to_f32 r7
30526 r7 = mul_f32 r7 r0
Herb Derby43f76412020-03-11 16:54:35 -040030627 r7 = fma_f32 r7 r1 r8
30728 r7 = mul_f32 r7 r3
30829 r7 = round r7
30930 r5 = bit_and r4 r5
31031 r5 = to_f32 r5
31132 r5 = mul_f32 r5 r0
31233 r8 = fma_f32 r5 r1 r8
31334 r8 = mul_f32 r8 r3
31435 r8 = round r8
31536 r7 = pack r8 r7 8
31637 r6 = pack r7 r6 16
Mike Klein5caf7de2020-03-12 11:05:46 -050031738 store32 arg(1) r6
Mike Klein5cdeb392020-02-10 12:10:36 -0600318
319RGBA_8888 over A8
Mike Klein5caf7de2020-03-12 11:05:46 -050032015 values (originally 33):
Herb Derby43f76412020-03-11 16:54:35 -0400321 v0 = load32 arg(0)
322 v1 = shr_i32 v0 24
323 v2 = to_f32 v1
324 v3 = splat 3F800000 (1)
325 v4 = splat 3B808081 (0.0039215689)
326 v5 = fnma_f32 v2 v4 v3
327 v6 = load8 arg(1)
328 v7 = to_f32 v6
329 v8 = mul_f32 v7 v4
330 v9 = mul_f32 v2 v4
331 v10 = fma_f32 v8 v5 v9
Mike Klein5cdeb392020-02-10 12:10:36 -0600332 v11 = splat 437F0000 (255)
Mike Klein5caf7de2020-03-12 11:05:46 -0500333 v12 = mul_f32 v10 v11
334 v13 = round v12
335 store8 arg(1) v13
Mike Klein5cdeb392020-02-10 12:10:36 -0600336
Mike Klein5caf7de2020-03-12 11:05:46 -05003376 registers, 15 instructions:
Herb Derby43f76412020-03-11 16:54:35 -04003380 r0 = splat 3F800000 (1)
3391 r1 = splat 3B808081 (0.0039215689)
Mike Klein5cdeb392020-02-10 12:10:36 -06003402 r2 = splat 437F0000 (255)
Mike Kleina6307322019-06-07 15:44:26 -0500341loop:
Mike Klein5caf7de2020-03-12 11:05:46 -05003423 r3 = load32 arg(0)
3434 r3 = shr_i32 r3 24
3445 r3 = to_f32 r3
Herb Derby43f76412020-03-11 16:54:35 -04003456 r4 = fnma_f32 r3 r1 r0
Mike Klein5caf7de2020-03-12 11:05:46 -05003467 r5 = load8 arg(1)
3478 r5 = to_f32 r5
Herb Derby43f76412020-03-11 16:54:35 -04003489 r5 = mul_f32 r5 r1
34910 r3 = mul_f32 r3 r1
35011 r3 = fma_f32 r5 r4 r3
35112 r3 = mul_f32 r3 r2
35213 r3 = round r3
35314 store8 arg(1) r3
Mike Klein5cdeb392020-02-10 12:10:36 -0600354
355RGBA_8888 over G8
Mike Klein5caf7de2020-03-12 11:05:46 -050035634 values (originally 39):
Herb Derby43f76412020-03-11 16:54:35 -0400357 v0 = load32 arg(0)
358 v1 = shr_i32 v0 24
359 v2 = to_f32 v1
360 v3 = splat 3F800000 (1)
361 v4 = splat 3B808081 (0.0039215689)
362 v5 = fnma_f32 v2 v4 v3
363 v6 = shr_i32 v0 16
364 v7 = splat FF (3.5733111e-43)
365 v8 = bit_and v7 v6
366 v9 = load8 arg(1)
Mike Klein5cdeb392020-02-10 12:10:36 -0600367 v10 = to_f32 v9
Herb Derby43f76412020-03-11 16:54:35 -0400368 v11 = mul_f32 v10 v4
369 v12 = to_f32 v8
370 v13 = mul_f32 v12 v4
371 v14 = fma_f32 v11 v5 v13
372 v15 = splat 3D93DD98 (0.0722)
373 v16 = mul_f32 v14 v15
374 v17 = shr_i32 v0 8
375 v18 = bit_and v7 v17
376 v19 = to_f32 v18
377 v20 = mul_f32 v19 v4
378 v21 = fma_f32 v11 v5 v20
379 v22 = splat 3F371759 (0.71520001)
380 v23 = fma_f32 v21 v22 v16
381 v24 = bit_and v7 v0
382 v25 = to_f32 v24
383 v26 = mul_f32 v25 v4
384 v27 = fma_f32 v11 v5 v26
385 v28 = splat 3E59B3D0 (0.21259999)
386 v29 = fma_f32 v27 v28 v23
Mike Klein7c0332c2020-03-05 14:18:04 -0600387 v30 = splat 437F0000 (255)
Mike Klein5caf7de2020-03-12 11:05:46 -0500388 v31 = mul_f32 v29 v30
389 v32 = round v31
390 store8 arg(1) v32
Mike Klein5cdeb392020-02-10 12:10:36 -0600391
Mike Klein5caf7de2020-03-12 11:05:46 -050039212 registers, 34 instructions:
Herb Derby43f76412020-03-11 16:54:35 -04003930 r0 = splat 3F800000 (1)
3941 r1 = splat 3B808081 (0.0039215689)
3952 r2 = splat FF (3.5733111e-43)
3963 r3 = splat 3D93DD98 (0.0722)
Mike Klein5cdeb392020-02-10 12:10:36 -06003974 r4 = splat 3F371759 (0.71520001)
Herb Derby43f76412020-03-11 16:54:35 -04003985 r5 = splat 3E59B3D0 (0.21259999)
Mike Klein5cdeb392020-02-10 12:10:36 -06003996 r6 = splat 437F0000 (255)
400loop:
Mike Klein5caf7de2020-03-12 11:05:46 -05004017 r7 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -04004028 r8 = shr_i32 r7 24
Mike Klein5caf7de2020-03-12 11:05:46 -05004039 r8 = to_f32 r8
Herb Derby43f76412020-03-11 16:54:35 -040040410 r8 = fnma_f32 r8 r1 r0
40511 r9 = shr_i32 r7 16
40612 r9 = bit_and r2 r9
40713 r10 = load8 arg(1)
40814 r10 = to_f32 r10
40915 r10 = mul_f32 r10 r1
41016 r9 = to_f32 r9
41117 r9 = mul_f32 r9 r1
41218 r9 = fma_f32 r10 r8 r9
41319 r9 = mul_f32 r9 r3
41420 r11 = shr_i32 r7 8
41521 r11 = bit_and r2 r11
41622 r11 = to_f32 r11
41723 r11 = mul_f32 r11 r1
41824 r11 = fma_f32 r10 r8 r11
41925 r9 = fma_f32 r11 r4 r9
42026 r7 = bit_and r2 r7
42127 r7 = to_f32 r7
42228 r7 = mul_f32 r7 r1
42329 r7 = fma_f32 r10 r8 r7
42430 r9 = fma_f32 r7 r5 r9
42531 r9 = mul_f32 r9 r6
42632 r9 = round r9
42733 store8 arg(1) r9
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600428
429RGBA_8888 over RGBA_8888
Mike Klein5caf7de2020-03-12 11:05:46 -050043051 values (originally 55):
Herb Derby43f76412020-03-11 16:54:35 -0400431 v0 = load32 arg(0)
432 v1 = shr_i32 v0 24
433 v2 = to_f32 v1
434 v3 = splat 3F800000 (1)
435 v4 = splat 3B808081 (0.0039215689)
436 v5 = fnma_f32 v2 v4 v3
Mike Klein5cdeb392020-02-10 12:10:36 -0600437 v6 = load32 arg(1)
Herb Derby43f76412020-03-11 16:54:35 -0400438 v7 = shr_i32 v6 24
Mike Klein5cdeb392020-02-10 12:10:36 -0600439 v8 = to_f32 v7
Herb Derby43f76412020-03-11 16:54:35 -0400440 v9 = mul_f32 v8 v4
441 v10 = mul_f32 v2 v4
442 v11 = fma_f32 v9 v5 v10
443 v12 = splat 437F0000 (255)
444 v13 = mul_f32 v11 v12
445 v14 = shr_i32 v6 16
446 v15 = splat FF (3.5733111e-43)
447 v16 = bit_and v15 v14
448 v17 = to_f32 v16
449 v18 = mul_f32 v17 v4
450 v19 = shr_i32 v0 16
451 v20 = bit_and v15 v19
452 v21 = to_f32 v20
453 v22 = mul_f32 v21 v4
454 v23 = fma_f32 v18 v5 v22
455 v24 = round v13
456 v25 = mul_f32 v23 v12
457 v26 = round v25
458 v27 = pack v26 v24 8
459 v28 = shr_i32 v6 8
460 v29 = bit_and v15 v28
461 v30 = to_f32 v29
462 v31 = mul_f32 v30 v4
463 v32 = shr_i32 v0 8
464 v33 = bit_and v15 v32
465 v34 = to_f32 v33
466 v35 = mul_f32 v34 v4
467 v36 = fma_f32 v31 v5 v35
468 v37 = bit_and v15 v6
469 v38 = to_f32 v37
470 v39 = mul_f32 v38 v4
471 v40 = bit_and v15 v0
472 v41 = to_f32 v40
473 v42 = mul_f32 v41 v4
474 v43 = fma_f32 v39 v5 v42
475 v44 = mul_f32 v36 v12
476 v45 = round v44
477 v46 = mul_f32 v43 v12
Mike Klein5caf7de2020-03-12 11:05:46 -0500478 v47 = round v46
Herb Derby43f76412020-03-11 16:54:35 -0400479 v48 = pack v47 v45 8
480 v49 = pack v48 v27 16
Mike Klein5caf7de2020-03-12 11:05:46 -0500481 store32 arg(1) v49
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600482
Herb Derby43f76412020-03-11 16:54:35 -040048310 registers, 51 instructions:
4840 r0 = splat 3F800000 (1)
4851 r1 = splat 3B808081 (0.0039215689)
4862 r2 = splat 437F0000 (255)
4873 r3 = splat FF (3.5733111e-43)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600488loop:
Mike Klein5caf7de2020-03-12 11:05:46 -05004894 r4 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -04004905 r5 = shr_i32 r4 24
Mike Klein5caf7de2020-03-12 11:05:46 -05004916 r5 = to_f32 r5
Herb Derby43f76412020-03-11 16:54:35 -04004927 r6 = fnma_f32 r5 r1 r0
4938 r7 = load32 arg(1)
4949 r8 = shr_i32 r7 24
49510 r8 = to_f32 r8
49611 r8 = mul_f32 r8 r1
49712 r5 = mul_f32 r5 r1
49813 r5 = fma_f32 r8 r6 r5
49914 r5 = mul_f32 r5 r2
50015 r8 = shr_i32 r7 16
50116 r8 = bit_and r3 r8
50217 r8 = to_f32 r8
50318 r8 = mul_f32 r8 r1
50419 r9 = shr_i32 r4 16
50520 r9 = bit_and r3 r9
50621 r9 = to_f32 r9
50722 r9 = mul_f32 r9 r1
50823 r9 = fma_f32 r8 r6 r9
50924 r5 = round r5
51025 r9 = mul_f32 r9 r2
51126 r9 = round r9
51227 r5 = pack r9 r5 8
51328 r9 = shr_i32 r7 8
51429 r9 = bit_and r3 r9
51530 r9 = to_f32 r9
51631 r9 = mul_f32 r9 r1
51732 r8 = shr_i32 r4 8
51833 r8 = bit_and r3 r8
51934 r8 = to_f32 r8
52035 r8 = mul_f32 r8 r1
52136 r8 = fma_f32 r9 r6 r8
52237 r7 = bit_and r3 r7
52338 r7 = to_f32 r7
52439 r7 = mul_f32 r7 r1
52540 r4 = bit_and r3 r4
52641 r4 = to_f32 r4
52742 r4 = mul_f32 r4 r1
52843 r4 = fma_f32 r7 r6 r4
52944 r8 = mul_f32 r8 r2
53045 r8 = round r8
53146 r4 = mul_f32 r4 r2
53247 r4 = round r4
Mike Klein5caf7de2020-03-12 11:05:46 -050053348 r8 = pack r4 r8 8
Herb Derby43f76412020-03-11 16:54:35 -040053449 r5 = pack r8 r5 16
53550 store32 arg(1) r5
Mike Klein267f5072019-06-03 16:27:46 -0500536
Mike Klein397fc882019-06-20 11:37:10 -0500537I32 (Naive) 8888 over 8888
Mike Klein5cdeb392020-02-10 12:10:36 -060053833 values (originally 33):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500539 v0 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -0400540 v1 = shr_i32 v0 24
541 v2 = splat 100 (3.5873241e-43)
542 v3 = sub_i32 v2 v1
543 v4 = load32 arg(1)
544 v5 = shr_i32 v4 16
545 v6 = splat FF (3.5733111e-43)
546 v7 = bit_and v6 v5
547 v8 = mul_i32 v7 v3
Mike Klein5cdeb392020-02-10 12:10:36 -0600548 v9 = shr_i32 v8 8
Herb Derby43f76412020-03-11 16:54:35 -0400549 v10 = shr_i32 v0 16
550 v11 = bit_and v6 v10
551 v12 = add_i32 v11 v9
552 v13 = shr_i32 v4 24
553 v14 = mul_i32 v13 v3
554 v15 = shr_i32 v14 8
555 v16 = add_i32 v1 v15
556 v17 = pack v12 v16 8
557 v18 = shr_i32 v4 8
558 v19 = bit_and v6 v18
559 v20 = mul_i32 v19 v3
560 v21 = shr_i32 v20 8
561 v22 = shr_i32 v0 8
562 v23 = bit_and v6 v22
563 v24 = add_i32 v23 v21
564 v25 = bit_and v6 v4
565 v26 = mul_i32 v25 v3
566 v27 = shr_i32 v26 8
567 v28 = bit_and v6 v0
568 v29 = add_i32 v28 v27
569 v30 = pack v29 v24 8
570 v31 = pack v30 v17 16
Mike Klein5cdeb392020-02-10 12:10:36 -0600571 store32 arg(1) v31
Mike Kleinaab45b52019-07-02 15:39:23 -0500572
Herb Derby43f76412020-03-11 16:54:35 -04005738 registers, 33 instructions:
5740 r0 = splat 100 (3.5873241e-43)
5751 r1 = splat FF (3.5733111e-43)
Mike Klein397fc882019-06-20 11:37:10 -0500576loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06005772 r2 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -04005783 r3 = shr_i32 r2 24
5794 r4 = sub_i32 r0 r3
5805 r5 = load32 arg(1)
5816 r6 = shr_i32 r5 16
5827 r6 = bit_and r1 r6
5838 r6 = mul_i32 r6 r4
5849 r6 = shr_i32 r6 8
58510 r7 = shr_i32 r2 16
58611 r7 = bit_and r1 r7
58712 r6 = add_i32 r7 r6
58813 r7 = shr_i32 r5 24
58914 r7 = mul_i32 r7 r4
59015 r7 = shr_i32 r7 8
59116 r7 = add_i32 r3 r7
59217 r7 = pack r6 r7 8
59318 r6 = shr_i32 r5 8
59419 r6 = bit_and r1 r6
59520 r6 = mul_i32 r6 r4
59621 r6 = shr_i32 r6 8
59722 r3 = shr_i32 r2 8
59823 r3 = bit_and r1 r3
59924 r6 = add_i32 r3 r6
60025 r5 = bit_and r1 r5
60126 r4 = mul_i32 r5 r4
60227 r4 = shr_i32 r4 8
60328 r2 = bit_and r1 r2
60429 r4 = add_i32 r2 r4
60530 r6 = pack r4 r6 8
60631 r7 = pack r6 r7 16
Mike Klein5cdeb392020-02-10 12:10:36 -060060732 store32 arg(1) r7
Mike Klein397fc882019-06-20 11:37:10 -0500608
Mike Klein7b7077c2019-06-03 17:10:59 -0500609I32 8888 over 8888
Mike Klein5cdeb392020-02-10 12:10:36 -060061029 values (originally 29):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500611 v0 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -0400612 v1 = shr_i32 v0 24
613 v2 = splat 100 (3.5873241e-43)
614 v3 = sub_i32 v2 v1
615 v4 = load32 arg(1)
616 v5 = bytes v4 3
617 v6 = mul_i16x2 v5 v3
618 v7 = shr_i32 v6 8
619 v8 = bytes v0 3
620 v9 = add_i32 v8 v7
621 v10 = shr_i32 v4 24
622 v11 = mul_i16x2 v10 v3
623 v12 = shr_i32 v11 8
624 v13 = add_i32 v1 v12
625 v14 = pack v9 v13 8
626 v15 = bytes v4 2
627 v16 = mul_i16x2 v15 v3
628 v17 = shr_i32 v16 8
629 v18 = bytes v0 2
630 v19 = add_i32 v18 v17
631 v20 = splat FF (3.5733111e-43)
632 v21 = bit_and v4 v20
633 v22 = mul_i16x2 v21 v3
634 v23 = shr_i32 v22 8
635 v24 = bit_and v0 v20
636 v25 = add_i32 v24 v23
637 v26 = pack v25 v19 8
638 v27 = pack v26 v14 16
Mike Klein5cdeb392020-02-10 12:10:36 -0600639 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500640
Herb Derby43f76412020-03-11 16:54:35 -04006418 registers, 29 instructions:
6420 r0 = splat 100 (3.5873241e-43)
6431 r1 = splat FF (3.5733111e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500644loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06006452 r2 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -04006463 r3 = shr_i32 r2 24
6474 r4 = sub_i32 r0 r3
6485 r5 = load32 arg(1)
6496 r6 = bytes r5 3
6507 r6 = mul_i16x2 r6 r4
6518 r6 = shr_i32 r6 8
6529 r7 = bytes r2 3
65310 r6 = add_i32 r7 r6
65411 r7 = shr_i32 r5 24
65512 r7 = mul_i16x2 r7 r4
65613 r7 = shr_i32 r7 8
65714 r7 = add_i32 r3 r7
65815 r7 = pack r6 r7 8
65916 r6 = bytes r5 2
66017 r6 = mul_i16x2 r6 r4
66118 r6 = shr_i32 r6 8
66219 r3 = bytes r2 2
66320 r6 = add_i32 r3 r6
66421 r5 = bit_and r5 r1
66522 r4 = mul_i16x2 r5 r4
66623 r4 = shr_i32 r4 8
66724 r2 = bit_and r2 r1
66825 r4 = add_i32 r2 r4
66926 r6 = pack r4 r6 8
67027 r7 = pack r6 r7 16
Mike Klein5cdeb392020-02-10 12:10:36 -060067128 store32 arg(1) r7
Mike Klein821f5e82019-06-13 10:56:51 -0500672
673I32 (SWAR) 8888 over 8888
Mike Klein5cdeb392020-02-10 12:10:36 -060067415 values (originally 15):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500675 v0 = load32 arg(0)
676 v1 = bytes v0 404
Mike Klein5e533c92019-07-22 13:44:54 -0500677 v2 = splat 1000100 (2.3510604e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500678 v3 = sub_i16x2 v2 v1
679 v4 = load32 arg(1)
Herb Derby43f76412020-03-11 16:54:35 -0400680 v5 = shr_i16x2 v4 8
681 v6 = mul_i16x2 v5 v3
682 v7 = splat FF00FF (2.3418409e-38)
683 v8 = bit_clear v6 v7
684 v9 = bit_and v4 v7
Mike Klein5cdeb392020-02-10 12:10:36 -0600685 v10 = mul_i16x2 v9 v3
Herb Derby43f76412020-03-11 16:54:35 -0400686 v11 = shr_i16x2 v10 8
687 v12 = bit_or v11 v8
Mike Klein5cdeb392020-02-10 12:10:36 -0600688 v13 = add_i32 v0 v12
689 store32 arg(1) v13
Mike Kleinaab45b52019-07-02 15:39:23 -0500690
Mike Klein5cdeb392020-02-10 12:10:36 -06006916 registers, 15 instructions:
Mike Kleinb5c43552020-01-07 11:39:30 -06006920 r0 = splat 1000100 (2.3510604e-38)
Mike Klein5cdeb392020-02-10 12:10:36 -06006931 r1 = splat FF00FF (2.3418409e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500694loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06006952 r2 = load32 arg(0)
6963 r3 = bytes r2 404
6974 r3 = sub_i16x2 r0 r3
6985 r4 = load32 arg(1)
Herb Derby43f76412020-03-11 16:54:35 -04006996 r5 = shr_i16x2 r4 8
Mike Klein5cdeb392020-02-10 12:10:36 -06007007 r5 = mul_i16x2 r5 r3
Herb Derby43f76412020-03-11 16:54:35 -04007018 r5 = bit_clear r5 r1
7029 r4 = bit_and r4 r1
Mike Klein5cdeb392020-02-10 12:10:36 -060070310 r3 = mul_i16x2 r4 r3
Herb Derby43f76412020-03-11 16:54:35 -040070411 r3 = shr_i16x2 r3 8
70512 r5 = bit_or r3 r5
70613 r5 = add_i32 r2 r5
70714 store32 arg(1) r5
Mike Klein7b7077c2019-06-03 17:10:59 -0500708
Mike Kleined9b1f12020-02-06 13:02:32 -06007096 values (originally 6):
Herb Derby43f76412020-03-11 16:54:35 -0400710 v0 = splat 2 (2.8025969e-45)
711 v1 = splat 1 (1.4012985e-45)
712 v2 = add_i32 v1 v0
Mike Kleinf9963112019-08-08 15:13:25 -0400713 v3 = load32 arg(0)
714 v4 = mul_i32 v3 v2
715 store32 arg(0) v4
716
7172 registers, 6 instructions:
Herb Derby43f76412020-03-11 16:54:35 -04007180 r0 = splat 2 (2.8025969e-45)
7191 r1 = splat 1 (1.4012985e-45)
7202 r0 = add_i32 r1 r0
Mike Kleinf9963112019-08-08 15:13:25 -0400721loop:
Herb Derby43f76412020-03-11 16:54:35 -04007223 r1 = load32 arg(0)
7234 r1 = mul_i32 r1 r0
7245 store32 arg(0) r1
Mike Kleinf9963112019-08-08 15:13:25 -0400725
Mike Klein5cdeb392020-02-10 12:10:36 -060072623 values (originally 23):
Herb Derby43f76412020-03-11 16:54:35 -0400727 v0 = load32 arg(1)
728 v1 = shr_i32 v0 24
729 v2 = load32 arg(0)
730 v3 = shr_i32 v2 24
731 v4 = add_i32 v3 v1
732 v5 = shr_i32 v0 16
733 v6 = splat FF (3.5733111e-43)
734 v7 = bit_and v6 v5
735 v8 = shr_i32 v2 16
736 v9 = bit_and v6 v8
737 v10 = add_i32 v9 v7
738 v11 = pack v10 v4 8
739 v12 = shr_i32 v0 8
740 v13 = bit_and v6 v12
741 v14 = shr_i32 v2 8
742 v15 = bit_and v6 v14
743 v16 = add_i32 v15 v13
744 v17 = bit_and v6 v0
745 v18 = bit_and v6 v2
746 v19 = add_i32 v18 v17
747 v20 = pack v19 v16 8
748 v21 = pack v20 v11 16
Mike Klein5cdeb392020-02-10 12:10:36 -0600749 store32 arg(1) v21
Mike Kleind48488b2019-10-22 12:27:58 -0500750
Mike Klein5cdeb392020-02-10 12:10:36 -06007516 registers, 23 instructions:
7520 r0 = splat FF (3.5733111e-43)
Mike Kleind48488b2019-10-22 12:27:58 -0500753loop:
Herb Derby43f76412020-03-11 16:54:35 -04007541 r1 = load32 arg(1)
7552 r2 = shr_i32 r1 24
7563 r3 = load32 arg(0)
7574 r4 = shr_i32 r3 24
7585 r2 = add_i32 r4 r2
7596 r4 = shr_i32 r1 16
7607 r4 = bit_and r0 r4
7618 r5 = shr_i32 r3 16
Mike Klein5cdeb392020-02-10 12:10:36 -06007629 r5 = bit_and r0 r5
Herb Derby43f76412020-03-11 16:54:35 -040076310 r4 = add_i32 r5 r4
76411 r2 = pack r4 r2 8
76512 r4 = shr_i32 r1 8
Mike Klein5cdeb392020-02-10 12:10:36 -060076613 r4 = bit_and r0 r4
Herb Derby43f76412020-03-11 16:54:35 -040076714 r5 = shr_i32 r3 8
76815 r5 = bit_and r0 r5
76916 r4 = add_i32 r5 r4
77017 r1 = bit_and r0 r1
77118 r3 = bit_and r0 r3
77219 r1 = add_i32 r3 r1
77320 r4 = pack r1 r4 8
77421 r2 = pack r4 r2 16
77522 store32 arg(1) r2
Mike Kleind48488b2019-10-22 12:27:58 -0500776