blob: fd12acdd82c19e1076eba98f82f62c1e9329985c [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein5caf7de2020-03-12 11:05:46 -0500214 values (originally 17):
Herb Derby31f6d042020-04-14 19:04:58 -04003 v0 = splat 437F0000 (255)
4 v1 = splat 3B808081 (0.0039215689)
5 v2 = load8 arg(0)
6 v3 = to_f32 v2
7 v4 = mul_f32 v3 v1
8 v5 = splat 3F800000 (1)
9 v6 = fnma_f32 v3 v1 v5
10 v7 = load8 arg(1)
11 v8 = to_f32 v7
12 v9 = mul_f32 v8 v1
13 v10 = fma_f32 v9 v6 v4
14 v11 = mul_f32 v10 v0
Mike Klein5caf7de2020-03-12 11:05:46 -050015 v12 = round v11
16 store8 arg(1) v12
Mike Klein8c1e0ef2019-11-12 09:07:23 -060017
Mike Klein5caf7de2020-03-12 11:05:46 -0500186 registers, 14 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -0400190 r0 = splat 437F0000 (255)
Herb Derby43f76412020-03-11 16:54:35 -0400201 r1 = splat 3B808081 (0.0039215689)
Herb Derby31f6d042020-04-14 19:04:58 -0400212 r2 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -060022loop:
Mike Klein5caf7de2020-03-12 11:05:46 -0500233 r3 = load8 arg(0)
244 r3 = to_f32 r3
Herb Derby31f6d042020-04-14 19:04:58 -0400255 r4 = mul_f32 r3 r1
266 r3 = fnma_f32 r3 r1 r2
277 r5 = load8 arg(1)
288 r5 = to_f32 r5
299 r5 = mul_f32 r5 r1
3010 r4 = fma_f32 r5 r3 r4
3111 r4 = mul_f32 r4 r0
3212 r4 = round r4
3313 store8 arg(1) r4
Mike Klein8c1e0ef2019-11-12 09:07:23 -060034
Mike Klein5cdeb392020-02-10 12:10:36 -060035A8 over G8
Mike Klein5caf7de2020-03-12 11:05:46 -05003619 values (originally 24):
Herb Derby31f6d042020-04-14 19:04:58 -040037 v0 = splat 437F0000 (255)
38 v1 = splat 3D93DD98 (0.0722)
Herb Derby43f76412020-03-11 16:54:35 -040039 v2 = splat 3F800000 (1)
40 v3 = splat 3B808081 (0.0039215689)
Herb Derby31f6d042020-04-14 19:04:58 -040041 v4 = load8 arg(0)
42 v5 = to_f32 v4
43 v6 = fnma_f32 v5 v3 v2
44 v7 = load8 arg(1)
45 v8 = to_f32 v7
46 v9 = mul_f32 v8 v3
47 v10 = mul_f32 v9 v6
48 v11 = mul_f32 v10 v1
49 v12 = splat 3F371759 (0.71520001)
50 v13 = fma_f32 v10 v12 v11
51 v14 = splat 3E59B3D0 (0.21259999)
52 v15 = fma_f32 v10 v14 v13
53 v16 = mul_f32 v15 v0
Herb Derby43f76412020-03-11 16:54:35 -040054 v17 = round v16
55 store8 arg(1) v17
56
578 registers, 19 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -0400580 r0 = splat 437F0000 (255)
591 r1 = splat 3D93DD98 (0.0722)
602 r2 = splat 3F800000 (1)
613 r3 = splat 3B808081 (0.0039215689)
624 r4 = splat 3F371759 (0.71520001)
635 r5 = splat 3E59B3D0 (0.21259999)
Herb Derby43f76412020-03-11 16:54:35 -040064loop:
656 r6 = load8 arg(0)
667 r6 = to_f32 r6
Herb Derby31f6d042020-04-14 19:04:58 -0400678 r6 = fnma_f32 r6 r3 r2
Herb Derby43f76412020-03-11 16:54:35 -0400689 r7 = load8 arg(1)
6910 r7 = to_f32 r7
Herb Derby31f6d042020-04-14 19:04:58 -04007011 r7 = mul_f32 r7 r3
Herb Derby43f76412020-03-11 16:54:35 -04007112 r6 = mul_f32 r7 r6
Herb Derby31f6d042020-04-14 19:04:58 -04007213 r7 = mul_f32 r6 r1
7314 r7 = fma_f32 r6 r4 r7
7415 r7 = fma_f32 r6 r5 r7
7516 r7 = mul_f32 r7 r0
Herb Derby43f76412020-03-11 16:54:35 -04007617 r7 = round r7
7718 store8 arg(1) r7
78
79A8 over RGBA_8888
8039 values (originally 41):
Herb Derby31f6d042020-04-14 19:04:58 -040081 v0 = splat 437F0000 (255)
82 v1 = splat 3B808081 (0.0039215689)
83 v2 = load8 arg(0)
84 v3 = to_f32 v2
85 v4 = mul_f32 v3 v1
86 v5 = splat 3F800000 (1)
87 v6 = fnma_f32 v3 v1 v5
88 v7 = load32 arg(1)
89 v8 = shr_i32 v7 24
90 v9 = to_f32 v8
91 v10 = mul_f32 v9 v1
92 v11 = fma_f32 v10 v6 v4
93 v12 = mul_f32 v11 v0
94 v13 = round v12
95 v14 = shr_i32 v7 16
96 v15 = splat FF (3.5733111e-43)
97 v16 = bit_and v15 v14
98 v17 = to_f32 v16
99 v18 = mul_f32 v17 v1
100 v19 = mul_f32 v18 v6
101 v20 = mul_f32 v19 v0
Herb Derby43f76412020-03-11 16:54:35 -0400102 v21 = round v20
Herb Derby31f6d042020-04-14 19:04:58 -0400103 v22 = pack v21 v13 8
104 v23 = shr_i32 v7 8
105 v24 = bit_and v15 v23
Herb Derby43f76412020-03-11 16:54:35 -0400106 v25 = to_f32 v24
Herb Derby31f6d042020-04-14 19:04:58 -0400107 v26 = mul_f32 v25 v1
108 v27 = mul_f32 v26 v6
109 v28 = mul_f32 v27 v0
Herb Derby43f76412020-03-11 16:54:35 -0400110 v29 = round v28
Herb Derby31f6d042020-04-14 19:04:58 -0400111 v30 = bit_and v15 v7
Herb Derby43f76412020-03-11 16:54:35 -0400112 v31 = to_f32 v30
Herb Derby31f6d042020-04-14 19:04:58 -0400113 v32 = mul_f32 v31 v1
114 v33 = mul_f32 v32 v6
115 v34 = mul_f32 v33 v0
Herb Derby43f76412020-03-11 16:54:35 -0400116 v35 = round v34
117 v36 = pack v35 v29 8
118 v37 = pack v36 v22 16
119 store32 arg(1) v37
120
1218 registers, 39 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -04001220 r0 = splat 437F0000 (255)
Herb Derby43f76412020-03-11 16:54:35 -04001231 r1 = splat 3B808081 (0.0039215689)
Herb Derby31f6d042020-04-14 19:04:58 -04001242 r2 = splat 3F800000 (1)
Herb Derby43f76412020-03-11 16:54:35 -04001253 r3 = splat FF (3.5733111e-43)
126loop:
1274 r4 = load8 arg(0)
1285 r4 = to_f32 r4
Herb Derby31f6d042020-04-14 19:04:58 -04001296 r5 = mul_f32 r4 r1
1307 r4 = fnma_f32 r4 r1 r2
1318 r6 = load32 arg(1)
1329 r7 = shr_i32 r6 24
13310 r7 = to_f32 r7
13411 r7 = mul_f32 r7 r1
13512 r5 = fma_f32 r7 r4 r5
13613 r5 = mul_f32 r5 r0
13714 r5 = round r5
13815 r7 = shr_i32 r6 16
13916 r7 = bit_and r3 r7
Herb Derby43f76412020-03-11 16:54:35 -040014017 r7 = to_f32 r7
14118 r7 = mul_f32 r7 r1
Herb Derby31f6d042020-04-14 19:04:58 -040014219 r7 = mul_f32 r7 r4
14320 r7 = mul_f32 r7 r0
Herb Derby43f76412020-03-11 16:54:35 -040014421 r7 = round r7
Herb Derby31f6d042020-04-14 19:04:58 -040014522 r5 = pack r7 r5 8
Herb Derby43f76412020-03-11 16:54:35 -040014623 r7 = shr_i32 r6 8
14724 r7 = bit_and r3 r7
14825 r7 = to_f32 r7
14926 r7 = mul_f32 r7 r1
Herb Derby31f6d042020-04-14 19:04:58 -040015027 r7 = mul_f32 r7 r4
15128 r7 = mul_f32 r7 r0
Herb Derby43f76412020-03-11 16:54:35 -040015229 r7 = round r7
15330 r6 = bit_and r3 r6
15431 r6 = to_f32 r6
15532 r6 = mul_f32 r6 r1
Herb Derby31f6d042020-04-14 19:04:58 -040015633 r4 = mul_f32 r6 r4
15734 r4 = mul_f32 r4 r0
15835 r4 = round r4
15936 r7 = pack r4 r7 8
16037 r5 = pack r7 r5 16
16138 store32 arg(1) r5
Herb Derby43f76412020-03-11 16:54:35 -0400162
163G8 over A8
16411 values (originally 15):
Herb Derby31f6d042020-04-14 19:04:58 -0400165 v0 = splat 437F0000 (255)
166 v1 = splat 3F800000 (1)
167 v2 = splat 0 (0)
168 v3 = splat 3B808081 (0.0039215689)
169 v4 = load8 arg(1)
170 v5 = to_f32 v4
171 v6 = mul_f32 v5 v3
172 v7 = fma_f32 v6 v2 v1
173 v8 = mul_f32 v7 v0
Herb Derby43f76412020-03-11 16:54:35 -0400174 v9 = round v8
175 store8 arg(1) v9
176
1775 registers, 11 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -04001780 r0 = splat 437F0000 (255)
1791 r1 = splat 3F800000 (1)
1802 r2 = splat 0 (0)
1813 r3 = splat 3B808081 (0.0039215689)
Herb Derby43f76412020-03-11 16:54:35 -0400182loop:
1834 r4 = load8 arg(1)
1845 r4 = to_f32 r4
Herb Derby31f6d042020-04-14 19:04:58 -04001856 r4 = mul_f32 r4 r3
1867 r4 = fma_f32 r4 r2 r1
1878 r4 = mul_f32 r4 r0
Herb Derby43f76412020-03-11 16:54:35 -04001889 r4 = round r4
18910 store8 arg(1) r4
190
191G8 over G8
19219 values (originally 23):
Herb Derby31f6d042020-04-14 19:04:58 -0400193 v0 = splat 437F0000 (255)
194 v1 = splat 3D93DD98 (0.0722)
Herb Derby43f76412020-03-11 16:54:35 -0400195 v2 = splat 3B808081 (0.0039215689)
Herb Derby31f6d042020-04-14 19:04:58 -0400196 v3 = load8 arg(0)
197 v4 = to_f32 v3
198 v5 = mul_f32 v4 v2
199 v6 = splat 0 (0)
200 v7 = load8 arg(1)
201 v8 = to_f32 v7
202 v9 = mul_f32 v8 v2
203 v10 = fma_f32 v9 v6 v5
204 v11 = mul_f32 v10 v1
205 v12 = splat 3F371759 (0.71520001)
206 v13 = fma_f32 v10 v12 v11
207 v14 = splat 3E59B3D0 (0.21259999)
208 v15 = fma_f32 v10 v14 v13
209 v16 = mul_f32 v15 v0
Mike Klein5caf7de2020-03-12 11:05:46 -0500210 v17 = round v16
211 store8 arg(1) v17
Mike Klein5cdeb392020-02-10 12:10:36 -0600212
Mike Klein5caf7de2020-03-12 11:05:46 -05002138 registers, 19 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -04002140 r0 = splat 437F0000 (255)
2151 r1 = splat 3D93DD98 (0.0722)
2162 r2 = splat 3B808081 (0.0039215689)
2173 r3 = splat 0 (0)
2184 r4 = splat 3F371759 (0.71520001)
2195 r5 = splat 3E59B3D0 (0.21259999)
Mike Klein5cdeb392020-02-10 12:10:36 -0600220loop:
Herb Derby31f6d042020-04-14 19:04:58 -04002216 r6 = load8 arg(0)
Mike Klein5caf7de2020-03-12 11:05:46 -05002227 r6 = to_f32 r6
Herb Derby31f6d042020-04-14 19:04:58 -04002238 r6 = mul_f32 r6 r2
2249 r7 = load8 arg(1)
Mike Klein5caf7de2020-03-12 11:05:46 -050022510 r7 = to_f32 r7
Herb Derby31f6d042020-04-14 19:04:58 -040022611 r7 = mul_f32 r7 r2
22712 r6 = fma_f32 r7 r3 r6
22813 r7 = mul_f32 r6 r1
22914 r7 = fma_f32 r6 r4 r7
23015 r7 = fma_f32 r6 r5 r7
23116 r7 = mul_f32 r7 r0
23217 r7 = round r7
23318 store8 arg(1) r7
Mike Klein5cdeb392020-02-10 12:10:36 -0600234
Mike Klein5cdeb392020-02-10 12:10:36 -0600235G8 over RGBA_8888
Mike Klein5caf7de2020-03-12 11:05:46 -050023639 values (originally 43):
Herb Derby31f6d042020-04-14 19:04:58 -0400237 v0 = splat 437F0000 (255)
238 v1 = splat 3F800000 (1)
239 v2 = splat 0 (0)
Herb Derby43f76412020-03-11 16:54:35 -0400240 v3 = splat 3B808081 (0.0039215689)
Herb Derby31f6d042020-04-14 19:04:58 -0400241 v4 = load32 arg(1)
242 v5 = shr_i32 v4 24
243 v6 = to_f32 v5
244 v7 = mul_f32 v6 v3
245 v8 = fma_f32 v7 v2 v1
246 v9 = mul_f32 v8 v0
247 v10 = round v9
248 v11 = load8 arg(0)
249 v12 = to_f32 v11
250 v13 = mul_f32 v12 v3
251 v14 = shr_i32 v4 16
252 v15 = splat FF (3.5733111e-43)
253 v16 = bit_and v15 v14
254 v17 = to_f32 v16
255 v18 = mul_f32 v17 v3
256 v19 = fma_f32 v18 v2 v13
257 v20 = mul_f32 v19 v0
Herb Derby43f76412020-03-11 16:54:35 -0400258 v21 = round v20
Herb Derby31f6d042020-04-14 19:04:58 -0400259 v22 = pack v21 v10 8
260 v23 = shr_i32 v4 8
261 v24 = bit_and v15 v23
Herb Derby43f76412020-03-11 16:54:35 -0400262 v25 = to_f32 v24
263 v26 = mul_f32 v25 v3
Herb Derby31f6d042020-04-14 19:04:58 -0400264 v27 = fma_f32 v26 v2 v13
265 v28 = mul_f32 v27 v0
Herb Derby43f76412020-03-11 16:54:35 -0400266 v29 = round v28
Herb Derby31f6d042020-04-14 19:04:58 -0400267 v30 = bit_and v15 v4
Mike Klein5caf7de2020-03-12 11:05:46 -0500268 v31 = to_f32 v30
Herb Derby43f76412020-03-11 16:54:35 -0400269 v32 = mul_f32 v31 v3
Herb Derby31f6d042020-04-14 19:04:58 -0400270 v33 = fma_f32 v32 v2 v13
271 v34 = mul_f32 v33 v0
Mike Klein5caf7de2020-03-12 11:05:46 -0500272 v35 = round v34
Herb Derby43f76412020-03-11 16:54:35 -0400273 v36 = pack v35 v29 8
274 v37 = pack v36 v22 16
Mike Klein5caf7de2020-03-12 11:05:46 -0500275 store32 arg(1) v37
Mike Klein5cdeb392020-02-10 12:10:36 -0600276
Mike Klein5caf7de2020-03-12 11:05:46 -05002779 registers, 39 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -04002780 r0 = splat 437F0000 (255)
2791 r1 = splat 3F800000 (1)
2802 r2 = splat 0 (0)
2813 r3 = splat 3B808081 (0.0039215689)
Herb Derby43f76412020-03-11 16:54:35 -04002824 r4 = splat FF (3.5733111e-43)
Mike Klein5cdeb392020-02-10 12:10:36 -0600283loop:
Herb Derby43f76412020-03-11 16:54:35 -04002845 r5 = load32 arg(1)
2856 r6 = shr_i32 r5 24
2867 r6 = to_f32 r6
Herb Derby31f6d042020-04-14 19:04:58 -04002878 r6 = mul_f32 r6 r3
2889 r6 = fma_f32 r6 r2 r1
28910 r6 = mul_f32 r6 r0
29011 r6 = round r6
29112 r7 = load8 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -040029213 r7 = to_f32 r7
Herb Derby31f6d042020-04-14 19:04:58 -040029314 r7 = mul_f32 r7 r3
29415 r8 = shr_i32 r5 16
29516 r8 = bit_and r4 r8
29617 r8 = to_f32 r8
29718 r8 = mul_f32 r8 r3
29819 r8 = fma_f32 r8 r2 r7
29920 r8 = mul_f32 r8 r0
30021 r8 = round r8
30122 r6 = pack r8 r6 8
30223 r8 = shr_i32 r5 8
30324 r8 = bit_and r4 r8
30425 r8 = to_f32 r8
30526 r8 = mul_f32 r8 r3
30627 r8 = fma_f32 r8 r2 r7
30728 r8 = mul_f32 r8 r0
30829 r8 = round r8
Herb Derby43f76412020-03-11 16:54:35 -040030930 r5 = bit_and r4 r5
31031 r5 = to_f32 r5
Herb Derby31f6d042020-04-14 19:04:58 -040031132 r5 = mul_f32 r5 r3
31233 r7 = fma_f32 r5 r2 r7
31334 r7 = mul_f32 r7 r0
31435 r7 = round r7
31536 r8 = pack r7 r8 8
31637 r6 = pack r8 r6 16
Mike Klein5caf7de2020-03-12 11:05:46 -050031738 store32 arg(1) r6
Mike Klein5cdeb392020-02-10 12:10:36 -0600318
319RGBA_8888 over A8
Mike Klein5caf7de2020-03-12 11:05:46 -050032015 values (originally 33):
Herb Derby31f6d042020-04-14 19:04:58 -0400321 v0 = splat 437F0000 (255)
322 v1 = splat 3B808081 (0.0039215689)
323 v2 = load32 arg(0)
324 v3 = shr_i32 v2 24
325 v4 = to_f32 v3
326 v5 = mul_f32 v4 v1
327 v6 = splat 3F800000 (1)
328 v7 = fnma_f32 v4 v1 v6
329 v8 = load8 arg(1)
330 v9 = to_f32 v8
331 v10 = mul_f32 v9 v1
332 v11 = fma_f32 v10 v7 v5
333 v12 = mul_f32 v11 v0
Mike Klein5caf7de2020-03-12 11:05:46 -0500334 v13 = round v12
335 store8 arg(1) v13
Mike Klein5cdeb392020-02-10 12:10:36 -0600336
Mike Klein5caf7de2020-03-12 11:05:46 -05003376 registers, 15 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -04003380 r0 = splat 437F0000 (255)
Herb Derby43f76412020-03-11 16:54:35 -04003391 r1 = splat 3B808081 (0.0039215689)
Herb Derby31f6d042020-04-14 19:04:58 -04003402 r2 = splat 3F800000 (1)
Mike Kleina6307322019-06-07 15:44:26 -0500341loop:
Mike Klein5caf7de2020-03-12 11:05:46 -05003423 r3 = load32 arg(0)
3434 r3 = shr_i32 r3 24
3445 r3 = to_f32 r3
Herb Derby31f6d042020-04-14 19:04:58 -04003456 r4 = mul_f32 r3 r1
3467 r3 = fnma_f32 r3 r1 r2
3478 r5 = load8 arg(1)
3489 r5 = to_f32 r5
34910 r5 = mul_f32 r5 r1
35011 r4 = fma_f32 r5 r3 r4
35112 r4 = mul_f32 r4 r0
35213 r4 = round r4
35314 store8 arg(1) r4
Mike Klein5cdeb392020-02-10 12:10:36 -0600354
355RGBA_8888 over G8
Mike Klein5caf7de2020-03-12 11:05:46 -050035634 values (originally 39):
Herb Derby31f6d042020-04-14 19:04:58 -0400357 v0 = splat 437F0000 (255)
358 v1 = splat 3D93DD98 (0.0722)
359 v2 = splat 3B808081 (0.0039215689)
360 v3 = load32 arg(0)
361 v4 = shr_i32 v3 16
362 v5 = splat FF (3.5733111e-43)
363 v6 = bit_and v5 v4
364 v7 = to_f32 v6
365 v8 = mul_f32 v7 v2
366 v9 = splat 3F800000 (1)
367 v10 = shr_i32 v3 24
368 v11 = to_f32 v10
369 v12 = fnma_f32 v11 v2 v9
370 v13 = load8 arg(1)
371 v14 = to_f32 v13
372 v15 = mul_f32 v14 v2
373 v16 = fma_f32 v15 v12 v8
374 v17 = mul_f32 v16 v1
375 v18 = splat 3F371759 (0.71520001)
376 v19 = shr_i32 v3 8
377 v20 = bit_and v5 v19
378 v21 = to_f32 v20
379 v22 = mul_f32 v21 v2
380 v23 = fma_f32 v15 v12 v22
381 v24 = fma_f32 v23 v18 v17
382 v25 = splat 3E59B3D0 (0.21259999)
383 v26 = bit_and v5 v3
384 v27 = to_f32 v26
385 v28 = mul_f32 v27 v2
386 v29 = fma_f32 v15 v12 v28
387 v30 = fma_f32 v29 v25 v24
388 v31 = mul_f32 v30 v0
Mike Klein5caf7de2020-03-12 11:05:46 -0500389 v32 = round v31
390 store8 arg(1) v32
Mike Klein5cdeb392020-02-10 12:10:36 -0600391
Mike Klein5caf7de2020-03-12 11:05:46 -050039212 registers, 34 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -04003930 r0 = splat 437F0000 (255)
3941 r1 = splat 3D93DD98 (0.0722)
3952 r2 = splat 3B808081 (0.0039215689)
3963 r3 = splat FF (3.5733111e-43)
3974 r4 = splat 3F800000 (1)
3985 r5 = splat 3F371759 (0.71520001)
3996 r6 = splat 3E59B3D0 (0.21259999)
Mike Klein5cdeb392020-02-10 12:10:36 -0600400loop:
Mike Klein5caf7de2020-03-12 11:05:46 -05004017 r7 = load32 arg(0)
Herb Derby31f6d042020-04-14 19:04:58 -04004028 r8 = shr_i32 r7 16
4039 r8 = bit_and r3 r8
40410 r8 = to_f32 r8
40511 r8 = mul_f32 r8 r2
40612 r9 = shr_i32 r7 24
40713 r9 = to_f32 r9
40814 r9 = fnma_f32 r9 r2 r4
40915 r10 = load8 arg(1)
41016 r10 = to_f32 r10
41117 r10 = mul_f32 r10 r2
41218 r8 = fma_f32 r10 r9 r8
41319 r8 = mul_f32 r8 r1
Herb Derby43f76412020-03-11 16:54:35 -040041420 r11 = shr_i32 r7 8
Herb Derby31f6d042020-04-14 19:04:58 -040041521 r11 = bit_and r3 r11
Herb Derby43f76412020-03-11 16:54:35 -040041622 r11 = to_f32 r11
Herb Derby31f6d042020-04-14 19:04:58 -040041723 r11 = mul_f32 r11 r2
41824 r11 = fma_f32 r10 r9 r11
41925 r8 = fma_f32 r11 r5 r8
42026 r7 = bit_and r3 r7
Herb Derby43f76412020-03-11 16:54:35 -040042127 r7 = to_f32 r7
Herb Derby31f6d042020-04-14 19:04:58 -040042228 r7 = mul_f32 r7 r2
42329 r7 = fma_f32 r10 r9 r7
42430 r8 = fma_f32 r7 r6 r8
42531 r8 = mul_f32 r8 r0
42632 r8 = round r8
42733 store8 arg(1) r8
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600428
429RGBA_8888 over RGBA_8888
Mike Klein5caf7de2020-03-12 11:05:46 -050043051 values (originally 55):
Herb Derby31f6d042020-04-14 19:04:58 -0400431 v0 = splat 437F0000 (255)
432 v1 = splat 3B808081 (0.0039215689)
433 v2 = load32 arg(0)
434 v3 = shr_i32 v2 24
435 v4 = to_f32 v3
436 v5 = mul_f32 v4 v1
437 v6 = splat 3F800000 (1)
438 v7 = fnma_f32 v4 v1 v6
439 v8 = load32 arg(1)
440 v9 = shr_i32 v8 24
441 v10 = to_f32 v9
442 v11 = mul_f32 v10 v1
443 v12 = fma_f32 v11 v7 v5
444 v13 = mul_f32 v12 v0
445 v14 = round v13
446 v15 = shr_i32 v2 16
447 v16 = splat FF (3.5733111e-43)
448 v17 = bit_and v16 v15
449 v18 = to_f32 v17
450 v19 = mul_f32 v18 v1
451 v20 = shr_i32 v8 16
452 v21 = bit_and v16 v20
453 v22 = to_f32 v21
454 v23 = mul_f32 v22 v1
455 v24 = fma_f32 v23 v7 v19
456 v25 = mul_f32 v24 v0
Herb Derby43f76412020-03-11 16:54:35 -0400457 v26 = round v25
Herb Derby31f6d042020-04-14 19:04:58 -0400458 v27 = pack v26 v14 8
459 v28 = shr_i32 v2 8
460 v29 = bit_and v16 v28
Herb Derby43f76412020-03-11 16:54:35 -0400461 v30 = to_f32 v29
Herb Derby31f6d042020-04-14 19:04:58 -0400462 v31 = mul_f32 v30 v1
463 v32 = shr_i32 v8 8
464 v33 = bit_and v16 v32
Herb Derby43f76412020-03-11 16:54:35 -0400465 v34 = to_f32 v33
Herb Derby31f6d042020-04-14 19:04:58 -0400466 v35 = mul_f32 v34 v1
467 v36 = fma_f32 v35 v7 v31
468 v37 = mul_f32 v36 v0
469 v38 = round v37
470 v39 = bit_and v16 v2
471 v40 = to_f32 v39
472 v41 = mul_f32 v40 v1
473 v42 = bit_and v16 v8
474 v43 = to_f32 v42
475 v44 = mul_f32 v43 v1
476 v45 = fma_f32 v44 v7 v41
477 v46 = mul_f32 v45 v0
Mike Klein5caf7de2020-03-12 11:05:46 -0500478 v47 = round v46
Herb Derby31f6d042020-04-14 19:04:58 -0400479 v48 = pack v47 v38 8
Herb Derby43f76412020-03-11 16:54:35 -0400480 v49 = pack v48 v27 16
Mike Klein5caf7de2020-03-12 11:05:46 -0500481 store32 arg(1) v49
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600482
Herb Derby43f76412020-03-11 16:54:35 -040048310 registers, 51 instructions:
Herb Derby31f6d042020-04-14 19:04:58 -04004840 r0 = splat 437F0000 (255)
Herb Derby43f76412020-03-11 16:54:35 -04004851 r1 = splat 3B808081 (0.0039215689)
Herb Derby31f6d042020-04-14 19:04:58 -04004862 r2 = splat 3F800000 (1)
Herb Derby43f76412020-03-11 16:54:35 -04004873 r3 = splat FF (3.5733111e-43)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600488loop:
Mike Klein5caf7de2020-03-12 11:05:46 -05004894 r4 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -04004905 r5 = shr_i32 r4 24
Mike Klein5caf7de2020-03-12 11:05:46 -05004916 r5 = to_f32 r5
Herb Derby31f6d042020-04-14 19:04:58 -04004927 r6 = mul_f32 r5 r1
4938 r5 = fnma_f32 r5 r1 r2
4949 r7 = load32 arg(1)
49510 r8 = shr_i32 r7 24
49611 r8 = to_f32 r8
49712 r8 = mul_f32 r8 r1
49813 r6 = fma_f32 r8 r5 r6
49914 r6 = mul_f32 r6 r0
50015 r6 = round r6
50116 r8 = shr_i32 r4 16
50217 r8 = bit_and r3 r8
50318 r8 = to_f32 r8
50419 r8 = mul_f32 r8 r1
50520 r9 = shr_i32 r7 16
50621 r9 = bit_and r3 r9
50722 r9 = to_f32 r9
50823 r9 = mul_f32 r9 r1
50924 r8 = fma_f32 r9 r5 r8
51025 r8 = mul_f32 r8 r0
51126 r8 = round r8
51227 r6 = pack r8 r6 8
51328 r8 = shr_i32 r4 8
51429 r8 = bit_and r3 r8
51530 r8 = to_f32 r8
51631 r8 = mul_f32 r8 r1
51732 r9 = shr_i32 r7 8
51833 r9 = bit_and r3 r9
51934 r9 = to_f32 r9
52035 r9 = mul_f32 r9 r1
52136 r8 = fma_f32 r9 r5 r8
52237 r8 = mul_f32 r8 r0
52338 r8 = round r8
52439 r4 = bit_and r3 r4
52540 r4 = to_f32 r4
52641 r4 = mul_f32 r4 r1
52742 r7 = bit_and r3 r7
52843 r7 = to_f32 r7
52944 r7 = mul_f32 r7 r1
53045 r4 = fma_f32 r7 r5 r4
53146 r4 = mul_f32 r4 r0
Herb Derby43f76412020-03-11 16:54:35 -040053247 r4 = round r4
Mike Klein5caf7de2020-03-12 11:05:46 -050053348 r8 = pack r4 r8 8
Herb Derby31f6d042020-04-14 19:04:58 -040053449 r6 = pack r8 r6 16
53550 store32 arg(1) r6
Mike Klein267f5072019-06-03 16:27:46 -0500536
Mike Klein397fc882019-06-20 11:37:10 -0500537I32 (Naive) 8888 over 8888
Mike Klein5cdeb392020-02-10 12:10:36 -060053833 values (originally 33):
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500539 v0 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -0400540 v1 = shr_i32 v0 24
541 v2 = splat 100 (3.5873241e-43)
542 v3 = sub_i32 v2 v1
543 v4 = load32 arg(1)
Herb Derby31f6d042020-04-14 19:04:58 -0400544 v5 = shr_i32 v4 24
545 v6 = mul_i32 v5 v3
546 v7 = shr_i32 v6 8
547 v8 = add_i32 v1 v7
548 v9 = shr_i32 v4 16
549 v10 = splat FF (3.5733111e-43)
550 v11 = bit_and v10 v9
551 v12 = mul_i32 v11 v3
552 v13 = shr_i32 v12 8
553 v14 = shr_i32 v0 16
554 v15 = bit_and v10 v14
555 v16 = add_i32 v15 v13
556 v17 = pack v16 v8 8
Herb Derby43f76412020-03-11 16:54:35 -0400557 v18 = shr_i32 v4 8
Herb Derby31f6d042020-04-14 19:04:58 -0400558 v19 = bit_and v10 v18
Herb Derby43f76412020-03-11 16:54:35 -0400559 v20 = mul_i32 v19 v3
560 v21 = shr_i32 v20 8
561 v22 = shr_i32 v0 8
Herb Derby31f6d042020-04-14 19:04:58 -0400562 v23 = bit_and v10 v22
Herb Derby43f76412020-03-11 16:54:35 -0400563 v24 = add_i32 v23 v21
Herb Derby31f6d042020-04-14 19:04:58 -0400564 v25 = bit_and v10 v4
Herb Derby43f76412020-03-11 16:54:35 -0400565 v26 = mul_i32 v25 v3
566 v27 = shr_i32 v26 8
Herb Derby31f6d042020-04-14 19:04:58 -0400567 v28 = bit_and v10 v0
Herb Derby43f76412020-03-11 16:54:35 -0400568 v29 = add_i32 v28 v27
569 v30 = pack v29 v24 8
570 v31 = pack v30 v17 16
Mike Klein5cdeb392020-02-10 12:10:36 -0600571 store32 arg(1) v31
Mike Kleinaab45b52019-07-02 15:39:23 -0500572
Herb Derby43f76412020-03-11 16:54:35 -04005738 registers, 33 instructions:
5740 r0 = splat 100 (3.5873241e-43)
5751 r1 = splat FF (3.5733111e-43)
Mike Klein397fc882019-06-20 11:37:10 -0500576loop:
Mike Klein5cdeb392020-02-10 12:10:36 -06005772 r2 = load32 arg(0)
Herb Derby43f76412020-03-11 16:54:35 -04005783 r3 = shr_i32 r2 24
5794 r4 = sub_i32 r0 r3
5805 r5 = load32 arg(1)
Herb Derby31f6d042020-04-14 19:04:58 -04005816 r6 = shr_i32 r5 24
5827 r6 = mul_i32 r6 r4
5838 r6 = shr_i32 r6 8
5849 r6 = add_i32 r3 r6
58510 r3 = shr_i32 r5 16
58611 r3 = bit_and r1 r3
58712 r3 = mul_i32 r3 r4
58813 r3 = shr_i32 r3 8
58914 r7 = shr_i32 r2 16
59015 r7 = bit_and r1 r7
59116 r3 = add_i32 r7 r3
59217 r6 = pack r3 r6 8
59318 r3 = shr_i32 r5 8
59419 r3 = bit_and r1 r3
59520 r3 = mul_i32 r3 r4
59621 r3 = shr_i32 r3 8
59722 r7 = shr_i32 r2 8
59823 r7 = bit_and r1 r7
59924 r3 = add_i32 r7 r3
Herb Derby43f76412020-03-11 16:54:35 -040060025 r5 = bit_and r1 r5
60126 r4 = mul_i32 r5 r4
60227 r4 = shr_i32 r4 8
60328 r2 = bit_and r1 r2
60429 r4 = add_i32 r2 r4
Herb Derby31f6d042020-04-14 19:04:58 -040060530 r3 = pack r4 r3 8
60631 r6 = pack r3 r6 16
60732 store32 arg(1) r6
Mike Klein397fc882019-06-20 11:37:10 -0500608
Mike Klein5cdeb392020-02-10 12:10:36 -060060923 values (originally 23):
Herb Derby43f76412020-03-11 16:54:35 -0400610 v0 = load32 arg(1)
611 v1 = shr_i32 v0 24
612 v2 = load32 arg(0)
613 v3 = shr_i32 v2 24
614 v4 = add_i32 v3 v1
615 v5 = shr_i32 v0 16
616 v6 = splat FF (3.5733111e-43)
617 v7 = bit_and v6 v5
618 v8 = shr_i32 v2 16
619 v9 = bit_and v6 v8
620 v10 = add_i32 v9 v7
621 v11 = pack v10 v4 8
622 v12 = shr_i32 v0 8
623 v13 = bit_and v6 v12
624 v14 = shr_i32 v2 8
625 v15 = bit_and v6 v14
626 v16 = add_i32 v15 v13
627 v17 = bit_and v6 v0
628 v18 = bit_and v6 v2
629 v19 = add_i32 v18 v17
630 v20 = pack v19 v16 8
631 v21 = pack v20 v11 16
Mike Klein5cdeb392020-02-10 12:10:36 -0600632 store32 arg(1) v21
Mike Kleind48488b2019-10-22 12:27:58 -0500633
Mike Klein5cdeb392020-02-10 12:10:36 -06006346 registers, 23 instructions:
6350 r0 = splat FF (3.5733111e-43)
Mike Kleind48488b2019-10-22 12:27:58 -0500636loop:
Herb Derby43f76412020-03-11 16:54:35 -04006371 r1 = load32 arg(1)
6382 r2 = shr_i32 r1 24
6393 r3 = load32 arg(0)
6404 r4 = shr_i32 r3 24
6415 r2 = add_i32 r4 r2
6426 r4 = shr_i32 r1 16
6437 r4 = bit_and r0 r4
6448 r5 = shr_i32 r3 16
Mike Klein5cdeb392020-02-10 12:10:36 -06006459 r5 = bit_and r0 r5
Herb Derby43f76412020-03-11 16:54:35 -040064610 r4 = add_i32 r5 r4
64711 r2 = pack r4 r2 8
64812 r4 = shr_i32 r1 8
Mike Klein5cdeb392020-02-10 12:10:36 -060064913 r4 = bit_and r0 r4
Herb Derby43f76412020-03-11 16:54:35 -040065014 r5 = shr_i32 r3 8
65115 r5 = bit_and r0 r5
65216 r4 = add_i32 r5 r4
65317 r1 = bit_and r0 r1
65418 r3 = bit_and r0 r3
65519 r1 = add_i32 r3 r1
65620 r4 = pack r1 r4 8
65721 r2 = pack r4 r2 16
65822 store32 arg(1) r2
Mike Kleind48488b2019-10-22 12:27:58 -0500659