blob: 3c32654129ba61bf26a4c76b090e95d304d7546a [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein22ea7e92019-06-10 12:05:48 -0500217 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -05003☠️ v0 = splat 0 (0)
4 v1 = load8 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -05005 v2 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -05006 v3 = to_f32 v1
7 v4 = mul_f32 v2 v3
8 v5 = load8 arg(1)
9 v6 = to_f32 v5
10 v7 = mul_f32 v2 v6
Mike Klein5e533c92019-07-22 13:44:54 -050011 v8 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050012 v9 = sub_f32 v8 v4
13☠️ v10 = mul_f32 v0 v9
14 v11 = mad_f32 v7 v9 v4
Mike Klein5e533c92019-07-22 13:44:54 -050015 v12 = splat 437F0000 (255)
16 v13 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050017 v14 = mad_f32 v11 v12 v13
18 v15 = to_i32 v14
19 store8 arg(1) v15
Mike Klein22ea7e92019-06-10 12:05:48 -050020
Mike Klein754bad32019-06-05 10:47:46 -0500217 registers, 15 instructions:
Mike Kleinc2d9a312019-06-05 14:12:16 +000022r0 = splat 3B808081 (0.0039215689)
Mike Klein267f5072019-06-03 16:27:46 -050023r1 = splat 3F800000 (1)
Mike Klein267f5072019-06-03 16:27:46 -050024r2 = splat 437F0000 (255)
Mike Klein754bad32019-06-05 10:47:46 -050025r3 = splat 3F000000 (0.5)
26loop:
27r4 = load8 arg(0)
Mike Klein9656dce2019-06-04 11:33:25 -050028r4 = to_f32 r4
Mike Klein754bad32019-06-05 10:47:46 -050029r4 = mul_f32 r0 r4
Mike Klein9656dce2019-06-04 11:33:25 -050030r5 = load8 arg(1)
31r5 = to_f32 r5
Mike Klein754bad32019-06-05 10:47:46 -050032r5 = mul_f32 r0 r5
33r6 = sub_f32 r1 r4
Mike Klein0c334662019-06-23 15:52:37 -040034r4 = mad_f32 r5 r6 r4
35r4 = mad_f32 r4 r2 r3
36r4 = to_i32 r4
37store8 arg(1) r4
Mike Klein267f5072019-06-03 16:27:46 -050038
Mike Klein754bad32019-06-05 10:47:46 -050039A8 over G8
Mike Klein22ea7e92019-06-10 12:05:48 -05004023 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -050041☠️ v0 = splat 0 (0)
42 v1 = load8 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -050043 v2 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050044 v3 = to_f32 v1
45 v4 = mul_f32 v2 v3
46 v5 = load8 arg(1)
47 v6 = to_f32 v5
48 v7 = mul_f32 v2 v6
Mike Klein5e533c92019-07-22 13:44:54 -050049 v8 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050050 v9 = sub_f32 v8 v4
51 v10 = mul_f32 v7 v9
52☠️ v11 = mad_f32 v8 v9 v4
Mike Klein5e533c92019-07-22 13:44:54 -050053 v12 = splat 3E59B3D0 (0.21259999)
54 v13 = splat 3F371759 (0.71520001)
55 v14 = splat 3D93DD98 (0.0722)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050056 v15 = mul_f32 v10 v14
57 v16 = mad_f32 v10 v13 v15
58 v17 = mad_f32 v10 v12 v16
Mike Klein5e533c92019-07-22 13:44:54 -050059 v18 = splat 437F0000 (255)
60 v19 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050061 v20 = mad_f32 v17 v18 v19
62 v21 = to_i32 v20
63 store8 arg(1) v21
Mike Klein22ea7e92019-06-10 12:05:48 -050064
Mike Klein754bad32019-06-05 10:47:46 -0500659 registers, 21 instructions:
66r0 = splat 3B808081 (0.0039215689)
67r1 = splat 3F800000 (1)
68r2 = splat 3E59B3D0 (0.21259999)
69r3 = splat 3F371759 (0.71520001)
70r4 = splat 3D93DD98 (0.0722)
71r5 = splat 437F0000 (255)
72r6 = splat 3F000000 (0.5)
73loop:
74r7 = load8 arg(0)
75r7 = to_f32 r7
76r7 = mul_f32 r0 r7
77r8 = load8 arg(1)
78r8 = to_f32 r8
79r8 = mul_f32 r0 r8
80r7 = sub_f32 r1 r7
81r7 = mul_f32 r8 r7
82r8 = mul_f32 r7 r4
83r8 = mad_f32 r7 r3 r8
84r8 = mad_f32 r7 r2 r8
85r8 = mad_f32 r8 r5 r6
86r8 = to_i32 r8
87store8 arg(1) r8
88
89A8 over RGBA_8888
Mike Klein22ea7e92019-06-10 12:05:48 -05009039 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -050091☠️ v0 = splat 0 (0)
92 v1 = load8 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -050093 v2 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050094 v3 = to_f32 v1
95 v4 = mul_f32 v2 v3
96 v5 = load32 arg(1)
Mike Klein5e533c92019-07-22 13:44:54 -050097 v6 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -050098 v7 = extract v5 0 v6
99 v8 = to_f32 v7
100 v9 = mul_f32 v2 v8
101 v10 = extract v5 8 v6
102 v11 = to_f32 v10
103 v12 = mul_f32 v2 v11
104 v13 = extract v5 16 v6
105 v14 = to_f32 v13
106 v15 = mul_f32 v2 v14
107 v16 = extract v5 24 v6
108 v17 = to_f32 v16
109 v18 = mul_f32 v2 v17
Mike Klein5e533c92019-07-22 13:44:54 -0500110 v19 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500111 v20 = sub_f32 v19 v4
112 v21 = mul_f32 v9 v20
113 v22 = mul_f32 v12 v20
114 v23 = mul_f32 v15 v20
115 v24 = mad_f32 v18 v20 v4
Mike Klein5e533c92019-07-22 13:44:54 -0500116 v25 = splat 437F0000 (255)
117 v26 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500118 v27 = mad_f32 v21 v25 v26
119 v28 = to_i32 v27
120 v29 = mad_f32 v22 v25 v26
121 v30 = to_i32 v29
122 v31 = mad_f32 v23 v25 v26
123 v32 = to_i32 v31
124 v33 = mad_f32 v24 v25 v26
125 v34 = to_i32 v33
126 v35 = pack v28 v30 8
127 v36 = pack v32 v34 8
128 v37 = pack v35 v36 16
129 store32 arg(1) v37
Mike Klein22ea7e92019-06-10 12:05:48 -0500130
Mike Kleina6307322019-06-07 15:44:26 -050013111 registers, 38 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500132r0 = splat 3B808081 (0.0039215689)
Mike Kleina6307322019-06-07 15:44:26 -0500133r1 = splat FF (3.5733111e-43)
134r2 = splat 3F800000 (1)
135r3 = splat 437F0000 (255)
136r4 = splat 3F000000 (0.5)
Mike Klein754bad32019-06-05 10:47:46 -0500137loop:
Mike Kleina6307322019-06-07 15:44:26 -0500138r5 = load8 arg(0)
Mike Klein9656dce2019-06-04 11:33:25 -0500139r5 = to_f32 r5
Mike Klein754bad32019-06-05 10:47:46 -0500140r5 = mul_f32 r0 r5
Mike Kleina6307322019-06-07 15:44:26 -0500141r6 = load32 arg(1)
142r7 = extract r6 0 r1
143r7 = to_f32 r7
144r7 = mul_f32 r0 r7
145r8 = extract r6 8 r1
146r8 = to_f32 r8
147r8 = mul_f32 r0 r8
148r9 = extract r6 16 r1
149r9 = to_f32 r9
150r9 = mul_f32 r0 r9
151r6 = extract r6 24 r1
152r6 = to_f32 r6
153r6 = mul_f32 r0 r6
154r10 = sub_f32 r2 r5
155r7 = mul_f32 r7 r10
156r8 = mul_f32 r8 r10
157r9 = mul_f32 r9 r10
Mike Klein0c334662019-06-23 15:52:37 -0400158r5 = mad_f32 r6 r10 r5
Mike Kleina6307322019-06-07 15:44:26 -0500159r7 = mad_f32 r7 r3 r4
Mike Klein267f5072019-06-03 16:27:46 -0500160r7 = to_i32 r7
Mike Kleina6307322019-06-07 15:44:26 -0500161r8 = mad_f32 r8 r3 r4
Mike Klein267f5072019-06-03 16:27:46 -0500162r8 = to_i32 r8
Mike Kleina6307322019-06-07 15:44:26 -0500163r9 = mad_f32 r9 r3 r4
Mike Klein754bad32019-06-05 10:47:46 -0500164r9 = to_i32 r9
Mike Klein0c334662019-06-23 15:52:37 -0400165r5 = mad_f32 r5 r3 r4
166r5 = to_i32 r5
Mike Kleina6307322019-06-07 15:44:26 -0500167r8 = pack r7 r8 8
Mike Klein0c334662019-06-23 15:52:37 -0400168r5 = pack r9 r5 8
169r5 = pack r8 r5 16
170store32 arg(1) r5
Mike Klein754bad32019-06-05 10:47:46 -0500171
172G8 over A8
Mike Klein22ea7e92019-06-10 12:05:48 -050017317 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500174☠️ v0 = load8 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500175 v1 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500176☠️ v2 = to_f32 v0
177☠️ v3 = mul_f32 v1 v2
Mike Klein5e533c92019-07-22 13:44:54 -0500178 v4 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500179☠️ v5 = splat 0 (0)
180 v6 = load8 arg(1)
181 v7 = to_f32 v6
182 v8 = mul_f32 v1 v7
Mike Klein5e533c92019-07-22 13:44:54 -0500183 v9 = sub_f32 v4 v4
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500184☠️ v10 = mad_f32 v5 v9 v3
185 v11 = mad_f32 v8 v9 v4
Mike Klein5e533c92019-07-22 13:44:54 -0500186 v12 = splat 437F0000 (255)
187 v13 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500188 v14 = mad_f32 v11 v12 v13
189 v15 = to_i32 v14
190 store8 arg(1) v15
Mike Klein22ea7e92019-06-10 12:05:48 -0500191
Mike Klein754bad32019-06-05 10:47:46 -05001926 registers, 12 instructions:
193r0 = splat 3B808081 (0.0039215689)
194r1 = splat 3F800000 (1)
195r2 = sub_f32 r1 r1
196r3 = splat 437F0000 (255)
197r4 = splat 3F000000 (0.5)
198loop:
199r5 = load8 arg(1)
200r5 = to_f32 r5
201r5 = mul_f32 r0 r5
202r5 = mad_f32 r5 r2 r1
203r5 = mad_f32 r5 r3 r4
204r5 = to_i32 r5
205store8 arg(1) r5
206
207G8 over G8
Mike Klein22ea7e92019-06-10 12:05:48 -050020822 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500209 v0 = load8 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500210 v1 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500211 v2 = to_f32 v0
212 v3 = mul_f32 v1 v2
Mike Klein5e533c92019-07-22 13:44:54 -0500213 v4 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500214 v5 = load8 arg(1)
215 v6 = to_f32 v5
216 v7 = mul_f32 v1 v6
Mike Klein5e533c92019-07-22 13:44:54 -0500217 v8 = sub_f32 v4 v4
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500218 v9 = mad_f32 v7 v8 v3
219☠️ v10 = mad_f32 v4 v8 v4
Mike Klein5e533c92019-07-22 13:44:54 -0500220 v11 = splat 3E59B3D0 (0.21259999)
221 v12 = splat 3F371759 (0.71520001)
222 v13 = splat 3D93DD98 (0.0722)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500223 v14 = mul_f32 v9 v13
224 v15 = mad_f32 v9 v12 v14
225 v16 = mad_f32 v9 v11 v15
Mike Klein5e533c92019-07-22 13:44:54 -0500226 v17 = splat 437F0000 (255)
227 v18 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500228 v19 = mad_f32 v16 v17 v18
229 v20 = to_i32 v19
230 store8 arg(1) v20
Mike Klein22ea7e92019-06-10 12:05:48 -0500231
Mike Kleinf9963112019-08-08 15:13:25 -04002329 registers, 21 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500233r0 = splat 3B808081 (0.0039215689)
234r1 = splat 3F800000 (1)
Mike Kleinf9963112019-08-08 15:13:25 -0400235r1 = sub_f32 r1 r1
236r2 = splat 3E59B3D0 (0.21259999)
237r3 = splat 3F371759 (0.71520001)
238r4 = splat 3D93DD98 (0.0722)
239r5 = splat 437F0000 (255)
240r6 = splat 3F000000 (0.5)
Mike Klein754bad32019-06-05 10:47:46 -0500241loop:
Mike Kleinf9963112019-08-08 15:13:25 -0400242r7 = load8 arg(0)
243r7 = to_f32 r7
244r7 = mul_f32 r0 r7
245r8 = load8 arg(1)
Mike Klein754bad32019-06-05 10:47:46 -0500246r8 = to_f32 r8
247r8 = mul_f32 r0 r8
Mike Kleinf9963112019-08-08 15:13:25 -0400248r7 = mad_f32 r8 r1 r7
249r8 = mul_f32 r7 r4
250r8 = mad_f32 r7 r3 r8
251r8 = mad_f32 r7 r2 r8
252r8 = mad_f32 r8 r5 r6
253r8 = to_i32 r8
254store8 arg(1) r8
Mike Klein754bad32019-06-05 10:47:46 -0500255
256G8 over RGBA_8888
Mike Klein22ea7e92019-06-10 12:05:48 -050025738 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500258 v0 = load8 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500259 v1 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500260 v2 = to_f32 v0
261 v3 = mul_f32 v1 v2
Mike Klein5e533c92019-07-22 13:44:54 -0500262 v4 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500263 v5 = load32 arg(1)
Mike Klein5e533c92019-07-22 13:44:54 -0500264 v6 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500265 v7 = extract v5 0 v6
266 v8 = to_f32 v7
267 v9 = mul_f32 v1 v8
268 v10 = extract v5 8 v6
269 v11 = to_f32 v10
270 v12 = mul_f32 v1 v11
271 v13 = extract v5 16 v6
272 v14 = to_f32 v13
273 v15 = mul_f32 v1 v14
274 v16 = extract v5 24 v6
275 v17 = to_f32 v16
276 v18 = mul_f32 v1 v17
Mike Klein5e533c92019-07-22 13:44:54 -0500277 v19 = sub_f32 v4 v4
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500278 v20 = mad_f32 v9 v19 v3
279 v21 = mad_f32 v12 v19 v3
280 v22 = mad_f32 v15 v19 v3
281 v23 = mad_f32 v18 v19 v4
Mike Klein5e533c92019-07-22 13:44:54 -0500282 v24 = splat 437F0000 (255)
283 v25 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500284 v26 = mad_f32 v20 v24 v25
285 v27 = to_i32 v26
286 v28 = mad_f32 v21 v24 v25
287 v29 = to_i32 v28
288 v30 = mad_f32 v22 v24 v25
289 v31 = to_i32 v30
290 v32 = mad_f32 v23 v24 v25
291 v33 = to_i32 v32
292 v34 = pack v27 v29 8
293 v35 = pack v31 v33 8
294 v36 = pack v34 v35 16
295 store32 arg(1) v36
Mike Klein22ea7e92019-06-10 12:05:48 -0500296
Mike Kleina6307322019-06-07 15:44:26 -050029711 registers, 38 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500298r0 = splat 3B808081 (0.0039215689)
299r1 = splat 3F800000 (1)
Mike Kleina6307322019-06-07 15:44:26 -0500300r2 = splat FF (3.5733111e-43)
301r3 = sub_f32 r1 r1
302r4 = splat 437F0000 (255)
303r5 = splat 3F000000 (0.5)
304loop:
305r6 = load8 arg(0)
306r6 = to_f32 r6
307r6 = mul_f32 r0 r6
308r7 = load32 arg(1)
309r8 = extract r7 0 r2
310r8 = to_f32 r8
311r8 = mul_f32 r0 r8
312r9 = extract r7 8 r2
313r9 = to_f32 r9
314r9 = mul_f32 r0 r9
315r10 = extract r7 16 r2
316r10 = to_f32 r10
317r10 = mul_f32 r0 r10
318r7 = extract r7 24 r2
319r7 = to_f32 r7
320r7 = mul_f32 r0 r7
321r8 = mad_f32 r8 r3 r6
322r9 = mad_f32 r9 r3 r6
Mike Klein0c334662019-06-23 15:52:37 -0400323r6 = mad_f32 r10 r3 r6
Mike Kleina6307322019-06-07 15:44:26 -0500324r7 = mad_f32 r7 r3 r1
325r8 = mad_f32 r8 r4 r5
326r8 = to_i32 r8
327r9 = mad_f32 r9 r4 r5
328r9 = to_i32 r9
Mike Klein0c334662019-06-23 15:52:37 -0400329r6 = mad_f32 r6 r4 r5
330r6 = to_i32 r6
Mike Kleina6307322019-06-07 15:44:26 -0500331r7 = mad_f32 r7 r4 r5
332r7 = to_i32 r7
333r9 = pack r8 r9 8
Mike Klein0c334662019-06-23 15:52:37 -0400334r7 = pack r6 r7 8
Mike Kleina6307322019-06-07 15:44:26 -0500335r7 = pack r9 r7 16
336store32 arg(1) r7
337
338RGBA_8888 over A8
Mike Klein22ea7e92019-06-10 12:05:48 -050033930 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500340 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500341 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500342☠️ v2 = extract v0 0 v1
Mike Klein5e533c92019-07-22 13:44:54 -0500343 v3 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500344☠️ v4 = to_f32 v2
345☠️ v5 = mul_f32 v3 v4
346☠️ v6 = extract v0 8 v1
347☠️ v7 = to_f32 v6
348☠️ v8 = mul_f32 v3 v7
349☠️ v9 = extract v0 16 v1
350☠️ v10 = to_f32 v9
351☠️ v11 = mul_f32 v3 v10
352 v12 = extract v0 24 v1
353 v13 = to_f32 v12
354 v14 = mul_f32 v3 v13
355☠️ v15 = splat 0 (0)
356 v16 = load8 arg(1)
357 v17 = to_f32 v16
358 v18 = mul_f32 v3 v17
Mike Klein5e533c92019-07-22 13:44:54 -0500359 v19 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500360 v20 = sub_f32 v19 v14
361☠️ v21 = mad_f32 v15 v20 v5
362☠️ v22 = mad_f32 v15 v20 v8
363☠️ v23 = mad_f32 v15 v20 v11
364 v24 = mad_f32 v18 v20 v14
Mike Klein5e533c92019-07-22 13:44:54 -0500365 v25 = splat 437F0000 (255)
366 v26 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500367 v27 = mad_f32 v24 v25 v26
368 v28 = to_i32 v27
369 store8 arg(1) v28
Mike Klein22ea7e92019-06-10 12:05:48 -0500370
Mike Kleina6307322019-06-07 15:44:26 -05003718 registers, 17 instructions:
372r0 = splat FF (3.5733111e-43)
373r1 = splat 3B808081 (0.0039215689)
374r2 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -0500375r3 = splat 437F0000 (255)
376r4 = splat 3F000000 (0.5)
377loop:
Mike Kleina6307322019-06-07 15:44:26 -0500378r5 = load32 arg(0)
379r5 = extract r5 24 r0
Mike Klein754bad32019-06-05 10:47:46 -0500380r5 = to_f32 r5
Mike Kleina6307322019-06-07 15:44:26 -0500381r5 = mul_f32 r1 r5
382r6 = load8 arg(1)
Mike Klein754bad32019-06-05 10:47:46 -0500383r6 = to_f32 r6
Mike Kleina6307322019-06-07 15:44:26 -0500384r6 = mul_f32 r1 r6
385r7 = sub_f32 r2 r5
Mike Klein0c334662019-06-23 15:52:37 -0400386r5 = mad_f32 r6 r7 r5
387r5 = mad_f32 r5 r3 r4
388r5 = to_i32 r5
389store8 arg(1) r5
Mike Klein754bad32019-06-05 10:47:46 -0500390
Mike Kleina6307322019-06-07 15:44:26 -0500391RGBA_8888 over G8
Mike Klein22ea7e92019-06-10 12:05:48 -050039235 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500393 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500394 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500395 v2 = extract v0 0 v1
Mike Klein5e533c92019-07-22 13:44:54 -0500396 v3 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500397 v4 = to_f32 v2
398 v5 = mul_f32 v3 v4
399 v6 = extract v0 8 v1
400 v7 = to_f32 v6
401 v8 = mul_f32 v3 v7
402 v9 = extract v0 16 v1
403 v10 = to_f32 v9
404 v11 = mul_f32 v3 v10
405 v12 = extract v0 24 v1
406 v13 = to_f32 v12
407 v14 = mul_f32 v3 v13
408 v15 = load8 arg(1)
409 v16 = to_f32 v15
410 v17 = mul_f32 v3 v16
Mike Klein5e533c92019-07-22 13:44:54 -0500411 v18 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500412 v19 = sub_f32 v18 v14
413 v20 = mad_f32 v17 v19 v5
414 v21 = mad_f32 v17 v19 v8
415 v22 = mad_f32 v17 v19 v11
416☠️ v23 = mad_f32 v18 v19 v14
Mike Klein5e533c92019-07-22 13:44:54 -0500417 v24 = splat 3E59B3D0 (0.21259999)
418 v25 = splat 3F371759 (0.71520001)
419 v26 = splat 3D93DD98 (0.0722)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500420 v27 = mul_f32 v22 v26
421 v28 = mad_f32 v21 v25 v27
422 v29 = mad_f32 v20 v24 v28
Mike Klein5e533c92019-07-22 13:44:54 -0500423 v30 = splat 437F0000 (255)
424 v31 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500425 v32 = mad_f32 v29 v30 v31
426 v33 = to_i32 v32
427 store8 arg(1) v33
Mike Klein22ea7e92019-06-10 12:05:48 -0500428
Mike Kleina6307322019-06-07 15:44:26 -050042913 registers, 34 instructions:
430r0 = splat FF (3.5733111e-43)
431r1 = splat 3B808081 (0.0039215689)
432r2 = splat 3F800000 (1)
433r3 = splat 3E59B3D0 (0.21259999)
434r4 = splat 3F371759 (0.71520001)
435r5 = splat 3D93DD98 (0.0722)
436r6 = splat 437F0000 (255)
437r7 = splat 3F000000 (0.5)
Mike Klein754bad32019-06-05 10:47:46 -0500438loop:
Mike Kleina6307322019-06-07 15:44:26 -0500439r8 = load32 arg(0)
440r9 = extract r8 0 r0
Mike Klein754bad32019-06-05 10:47:46 -0500441r9 = to_f32 r9
Mike Kleina6307322019-06-07 15:44:26 -0500442r9 = mul_f32 r1 r9
443r10 = extract r8 8 r0
Mike Klein754bad32019-06-05 10:47:46 -0500444r10 = to_f32 r10
Mike Kleina6307322019-06-07 15:44:26 -0500445r10 = mul_f32 r1 r10
446r11 = extract r8 16 r0
Mike Klein754bad32019-06-05 10:47:46 -0500447r11 = to_f32 r11
Mike Kleina6307322019-06-07 15:44:26 -0500448r11 = mul_f32 r1 r11
449r8 = extract r8 24 r0
Mike Klein754bad32019-06-05 10:47:46 -0500450r8 = to_f32 r8
Mike Kleina6307322019-06-07 15:44:26 -0500451r8 = mul_f32 r1 r8
452r12 = load8 arg(1)
453r12 = to_f32 r12
454r12 = mul_f32 r1 r12
455r8 = sub_f32 r2 r8
456r9 = mad_f32 r12 r8 r9
457r10 = mad_f32 r12 r8 r10
Mike Klein0c334662019-06-23 15:52:37 -0400458r11 = mad_f32 r12 r8 r11
459r11 = mul_f32 r11 r5
460r11 = mad_f32 r10 r4 r11
461r11 = mad_f32 r9 r3 r11
462r11 = mad_f32 r11 r6 r7
463r11 = to_i32 r11
464store8 arg(1) r11
Mike Kleina6307322019-06-07 15:44:26 -0500465
466RGBA_8888 over RGBA_8888
Mike Klein22ea7e92019-06-10 12:05:48 -050046748 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500468 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500469 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500470 v2 = extract v0 0 v1
Mike Klein5e533c92019-07-22 13:44:54 -0500471 v3 = splat 3B808081 (0.0039215689)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500472 v4 = to_f32 v2
473 v5 = mul_f32 v3 v4
474 v6 = extract v0 8 v1
475 v7 = to_f32 v6
476 v8 = mul_f32 v3 v7
477 v9 = extract v0 16 v1
478 v10 = to_f32 v9
479 v11 = mul_f32 v3 v10
480 v12 = extract v0 24 v1
481 v13 = to_f32 v12
482 v14 = mul_f32 v3 v13
483 v15 = load32 arg(1)
484 v16 = extract v15 0 v1
485 v17 = to_f32 v16
486 v18 = mul_f32 v3 v17
487 v19 = extract v15 8 v1
488 v20 = to_f32 v19
489 v21 = mul_f32 v3 v20
490 v22 = extract v15 16 v1
491 v23 = to_f32 v22
492 v24 = mul_f32 v3 v23
493 v25 = extract v15 24 v1
494 v26 = to_f32 v25
495 v27 = mul_f32 v3 v26
Mike Klein5e533c92019-07-22 13:44:54 -0500496 v28 = splat 3F800000 (1)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500497 v29 = sub_f32 v28 v14
498 v30 = mad_f32 v18 v29 v5
499 v31 = mad_f32 v21 v29 v8
500 v32 = mad_f32 v24 v29 v11
501 v33 = mad_f32 v27 v29 v14
Mike Klein5e533c92019-07-22 13:44:54 -0500502 v34 = splat 437F0000 (255)
503 v35 = splat 3F000000 (0.5)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500504 v36 = mad_f32 v30 v34 v35
505 v37 = to_i32 v36
506 v38 = mad_f32 v31 v34 v35
507 v39 = to_i32 v38
508 v40 = mad_f32 v32 v34 v35
509 v41 = to_i32 v40
510 v42 = mad_f32 v33 v34 v35
511 v43 = to_i32 v42
512 v44 = pack v37 v39 8
513 v45 = pack v41 v43 8
514 v46 = pack v44 v45 16
515 store32 arg(1) v46
Mike Klein22ea7e92019-06-10 12:05:48 -0500516
Mike Kleina6307322019-06-07 15:44:26 -050051714 registers, 48 instructions:
518r0 = splat FF (3.5733111e-43)
519r1 = splat 3B808081 (0.0039215689)
520r2 = splat 3F800000 (1)
521r3 = splat 437F0000 (255)
522r4 = splat 3F000000 (0.5)
523loop:
524r5 = load32 arg(0)
525r6 = extract r5 0 r0
526r6 = to_f32 r6
527r6 = mul_f32 r1 r6
528r7 = extract r5 8 r0
529r7 = to_f32 r7
530r7 = mul_f32 r1 r7
531r8 = extract r5 16 r0
532r8 = to_f32 r8
533r8 = mul_f32 r1 r8
534r5 = extract r5 24 r0
535r5 = to_f32 r5
536r5 = mul_f32 r1 r5
537r9 = load32 arg(1)
538r10 = extract r9 0 r0
539r10 = to_f32 r10
540r10 = mul_f32 r1 r10
541r11 = extract r9 8 r0
542r11 = to_f32 r11
543r11 = mul_f32 r1 r11
544r12 = extract r9 16 r0
545r12 = to_f32 r12
546r12 = mul_f32 r1 r12
547r9 = extract r9 24 r0
548r9 = to_f32 r9
549r9 = mul_f32 r1 r9
550r13 = sub_f32 r2 r5
Mike Klein0c334662019-06-23 15:52:37 -0400551r6 = mad_f32 r10 r13 r6
552r7 = mad_f32 r11 r13 r7
553r8 = mad_f32 r12 r13 r8
554r5 = mad_f32 r9 r13 r5
555r6 = mad_f32 r6 r3 r4
556r6 = to_i32 r6
557r7 = mad_f32 r7 r3 r4
558r7 = to_i32 r7
559r8 = mad_f32 r8 r3 r4
560r8 = to_i32 r8
561r5 = mad_f32 r5 r3 r4
562r5 = to_i32 r5
563r7 = pack r6 r7 8
564r5 = pack r8 r5 8
565r5 = pack r7 r5 16
566store32 arg(1) r5
Mike Klein267f5072019-06-03 16:27:46 -0500567
Mike Klein397fc882019-06-20 11:37:10 -0500568I32 (Naive) 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050056929 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500570 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500571 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500572 v2 = extract v0 0 v1
573 v3 = extract v0 8 v1
574 v4 = extract v0 16 v1
575 v5 = extract v0 24 v1
576 v6 = load32 arg(1)
577 v7 = extract v6 0 v1
578 v8 = extract v6 8 v1
579 v9 = extract v6 16 v1
580 v10 = extract v6 24 v1
Mike Klein5e533c92019-07-22 13:44:54 -0500581 v11 = splat 100 (3.5873241e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500582 v12 = sub_i32 v11 v5
583 v13 = mul_i32 v7 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500584 v14 = shr_i32 v13 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500585 v15 = add_i32 v2 v14
586 v16 = mul_i32 v8 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500587 v17 = shr_i32 v16 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500588 v18 = add_i32 v3 v17
589 v19 = mul_i32 v9 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500590 v20 = shr_i32 v19 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500591 v21 = add_i32 v4 v20
592 v22 = mul_i32 v10 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500593 v23 = shr_i32 v22 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500594 v24 = add_i32 v5 v23
595 v25 = pack v15 v18 8
596 v26 = pack v21 v24 8
597 v27 = pack v25 v26 16
598 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500599
Mike Klein397fc882019-06-20 11:37:10 -050060011 registers, 29 instructions:
601r0 = splat FF (3.5733111e-43)
602r1 = splat 100 (3.5873241e-43)
603loop:
604r2 = load32 arg(0)
605r3 = extract r2 0 r0
606r4 = extract r2 8 r0
607r5 = extract r2 16 r0
608r2 = extract r2 24 r0
609r6 = load32 arg(1)
610r7 = extract r6 0 r0
611r8 = extract r6 8 r0
612r9 = extract r6 16 r0
613r6 = extract r6 24 r0
614r10 = sub_i32 r1 r2
615r7 = mul_i32 r7 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500616r7 = shr_i32 r7 8
Mike Klein397fc882019-06-20 11:37:10 -0500617r7 = add_i32 r3 r7
618r8 = mul_i32 r8 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500619r8 = shr_i32 r8 8
Mike Klein397fc882019-06-20 11:37:10 -0500620r8 = add_i32 r4 r8
621r9 = mul_i32 r9 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500622r9 = shr_i32 r9 8
Mike Klein397fc882019-06-20 11:37:10 -0500623r9 = add_i32 r5 r9
624r10 = mul_i32 r6 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500625r10 = shr_i32 r10 8
Mike Klein397fc882019-06-20 11:37:10 -0500626r10 = add_i32 r2 r10
627r8 = pack r7 r8 8
628r10 = pack r9 r10 8
629r10 = pack r8 r10 16
630store32 arg(1) r10
631
Mike Klein7b7077c2019-06-03 17:10:59 -0500632I32 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050063329 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500634 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500635 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500636 v2 = bit_and v0 v1
637 v3 = bytes v0 2
638 v4 = bytes v0 3
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500639 v5 = shr_i32 v0 24
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500640 v6 = load32 arg(1)
641 v7 = bit_and v6 v1
642 v8 = bytes v6 2
643 v9 = bytes v6 3
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500644 v10 = shr_i32 v6 24
Mike Klein5e533c92019-07-22 13:44:54 -0500645 v11 = splat 100 (3.5873241e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500646 v12 = sub_i32 v11 v5
647 v13 = mul_i16x2 v7 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500648 v14 = shr_i32 v13 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500649 v15 = add_i32 v2 v14
650 v16 = mul_i16x2 v8 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500651 v17 = shr_i32 v16 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500652 v18 = add_i32 v3 v17
653 v19 = mul_i16x2 v9 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500654 v20 = shr_i32 v19 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500655 v21 = add_i32 v4 v20
656 v22 = mul_i16x2 v10 v12
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500657 v23 = shr_i32 v22 8
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500658 v24 = add_i32 v5 v23
659 v25 = pack v15 v18 8
660 v26 = pack v21 v24 8
661 v27 = pack v25 v26 16
662 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500663
Mike Klein821f5e82019-06-13 10:56:51 -050066411 registers, 29 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500665r0 = splat FF (3.5733111e-43)
Mike Klein821f5e82019-06-13 10:56:51 -0500666r1 = splat 100 (3.5873241e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500667loop:
668r2 = load32 arg(0)
Mike Klein342b1b22019-06-13 16:43:18 -0500669r3 = bit_and r2 r0
670r4 = bytes r2 2
671r5 = bytes r2 3
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500672r2 = shr_i32 r2 24
Mike Klein821f5e82019-06-13 10:56:51 -0500673r6 = load32 arg(1)
Mike Klein342b1b22019-06-13 16:43:18 -0500674r7 = bit_and r6 r0
675r8 = bytes r6 2
676r9 = bytes r6 3
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500677r6 = shr_i32 r6 24
Mike Klein821f5e82019-06-13 10:56:51 -0500678r10 = sub_i32 r1 r2
Mike Klein35389082019-06-13 11:29:26 -0500679r7 = mul_i16x2 r7 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500680r7 = shr_i32 r7 8
Mike Klein3f593792019-06-12 12:54:52 -0500681r7 = add_i32 r3 r7
Mike Klein35389082019-06-13 11:29:26 -0500682r8 = mul_i16x2 r8 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500683r8 = shr_i32 r8 8
Mike Klein821f5e82019-06-13 10:56:51 -0500684r8 = add_i32 r4 r8
Mike Klein35389082019-06-13 11:29:26 -0500685r9 = mul_i16x2 r9 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500686r9 = shr_i32 r9 8
Mike Klein821f5e82019-06-13 10:56:51 -0500687r9 = add_i32 r5 r9
Mike Klein35389082019-06-13 11:29:26 -0500688r10 = mul_i16x2 r6 r10
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500689r10 = shr_i32 r10 8
Mike Klein821f5e82019-06-13 10:56:51 -0500690r10 = add_i32 r2 r10
691r8 = pack r7 r8 8
692r10 = pack r9 r10 8
693r10 = pack r8 r10 16
694store32 arg(1) r10
695
696I32 (SWAR) 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050069715 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500698 v0 = load32 arg(0)
699 v1 = bytes v0 404
Mike Klein5e533c92019-07-22 13:44:54 -0500700 v2 = splat 1000100 (2.3510604e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500701 v3 = sub_i16x2 v2 v1
702 v4 = load32 arg(1)
Mike Klein5e533c92019-07-22 13:44:54 -0500703 v5 = splat FF00FF (2.3418409e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500704 v6 = bit_and v4 v5
705 v7 = shr_i16x2 v4 8
706 v8 = mul_i16x2 v6 v3
707 v9 = shr_i16x2 v8 8
708 v10 = mul_i16x2 v7 v3
709 v11 = bit_clear v10 v5
710 v12 = bit_or v9 v11
711 v13 = add_i32 v0 v12
712 store32 arg(1) v13
Mike Kleinaab45b52019-07-02 15:39:23 -0500713
Mike Klein2b7b2a22019-06-23 20:35:28 -04007146 registers, 15 instructions:
Mike Klein7f061fb2019-06-13 13:12:38 -0500715r0 = splat 1000100 (2.3510604e-38)
716r1 = splat FF00FF (2.3418409e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500717loop:
Mike Klein2b7b2a22019-06-23 20:35:28 -0400718r2 = load32 arg(0)
719r3 = bytes r2 404
720r3 = sub_i16x2 r0 r3
721r4 = load32 arg(1)
722r5 = bit_and r4 r1
723r4 = shr_i16x2 r4 8
724r5 = mul_i16x2 r5 r3
Mike Klein4c4945a2019-06-13 15:51:39 -0500725r5 = shr_i16x2 r5 8
Mike Klein2b7b2a22019-06-23 20:35:28 -0400726r3 = mul_i16x2 r4 r3
727r3 = bit_clear r3 r1
728r3 = bit_or r5 r3
729r3 = add_i32 r2 r3
730store32 arg(1) r3
Mike Klein7b7077c2019-06-03 17:10:59 -0500731
Mike Kleinf9963112019-08-08 15:13:25 -04007326 values:
733 v0 = splat 1 (1.4012985e-45)
734 v1 = splat 2 (2.8025969e-45)
735 v2 = add_i32 v0 v1
736 v3 = load32 arg(0)
737 v4 = mul_i32 v3 v2
738 store32 arg(0) v4
739
7402 registers, 6 instructions:
741r0 = splat 1 (1.4012985e-45)
742r1 = splat 2 (2.8025969e-45)
743r1 = add_i32 r0 r1
744loop:
745r0 = load32 arg(0)
746r0 = mul_i32 r0 r1
747store32 arg(0) r0
748