blob: 7e6849361591cbb453122816428221c3f49a4492 [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein22ea7e92019-06-10 12:05:48 -0500217 values:
Mike Kleina1167ab2019-06-23 17:06:37 -04003v0 = splat 0 (0)
4v1 = load8 arg(0)
5v2 = splat 3B808081 (0.0039215689)
6v3 = to_f32 v1
7v4 = mul_f32 v2 v3
8v5 = load8 arg(1)
9v6 = to_f32 v5
10v7 = mul_f32 v2 v6
11v8 = splat 3F800000 (1)
12v9 = sub_f32 v8 v4
13v10 = mul_f32 v0 v9
14v11 = mad_f32 v7 v9 v4
15v12 = splat 437F0000 (255)
16v13 = splat 3F000000 (0.5)
17v14 = mad_f32 v11 v12 v13
18v15 = to_i32 v14
19store8 arg(1) v15
Mike Klein22ea7e92019-06-10 12:05:48 -050020
Mike Klein754bad32019-06-05 10:47:46 -0500217 registers, 15 instructions:
Mike Kleinc2d9a312019-06-05 14:12:16 +000022r0 = splat 3B808081 (0.0039215689)
Mike Klein267f5072019-06-03 16:27:46 -050023r1 = splat 3F800000 (1)
Mike Klein267f5072019-06-03 16:27:46 -050024r2 = splat 437F0000 (255)
Mike Klein754bad32019-06-05 10:47:46 -050025r3 = splat 3F000000 (0.5)
26loop:
27r4 = load8 arg(0)
Mike Klein9656dce2019-06-04 11:33:25 -050028r4 = to_f32 r4
Mike Klein754bad32019-06-05 10:47:46 -050029r4 = mul_f32 r0 r4
Mike Klein9656dce2019-06-04 11:33:25 -050030r5 = load8 arg(1)
31r5 = to_f32 r5
Mike Klein754bad32019-06-05 10:47:46 -050032r5 = mul_f32 r0 r5
33r6 = sub_f32 r1 r4
Mike Klein0c334662019-06-23 15:52:37 -040034r4 = mad_f32 r5 r6 r4
35r4 = mad_f32 r4 r2 r3
36r4 = to_i32 r4
37store8 arg(1) r4
Mike Klein267f5072019-06-03 16:27:46 -050038
Mike Klein754bad32019-06-05 10:47:46 -050039A8 over G8
Mike Klein22ea7e92019-06-10 12:05:48 -05004023 values:
Mike Kleina1167ab2019-06-23 17:06:37 -040041v0 = splat 0 (0)
42v1 = load8 arg(0)
43v2 = splat 3B808081 (0.0039215689)
44v3 = to_f32 v1
45v4 = mul_f32 v2 v3
46v5 = load8 arg(1)
47v6 = to_f32 v5
48v7 = mul_f32 v2 v6
49v8 = splat 3F800000 (1)
50v9 = sub_f32 v8 v4
51v10 = mul_f32 v7 v9
52v11 = mad_f32 v8 v9 v4
53v12 = splat 3E59B3D0 (0.21259999)
54v13 = splat 3F371759 (0.71520001)
55v14 = splat 3D93DD98 (0.0722)
56v15 = mul_f32 v10 v14
57v16 = mad_f32 v10 v13 v15
58v17 = mad_f32 v10 v12 v16
59v18 = splat 437F0000 (255)
60v19 = splat 3F000000 (0.5)
61v20 = mad_f32 v17 v18 v19
62v21 = to_i32 v20
63store8 arg(1) v21
Mike Klein22ea7e92019-06-10 12:05:48 -050064
Mike Klein754bad32019-06-05 10:47:46 -0500659 registers, 21 instructions:
66r0 = splat 3B808081 (0.0039215689)
67r1 = splat 3F800000 (1)
68r2 = splat 3E59B3D0 (0.21259999)
69r3 = splat 3F371759 (0.71520001)
70r4 = splat 3D93DD98 (0.0722)
71r5 = splat 437F0000 (255)
72r6 = splat 3F000000 (0.5)
73loop:
74r7 = load8 arg(0)
75r7 = to_f32 r7
76r7 = mul_f32 r0 r7
77r8 = load8 arg(1)
78r8 = to_f32 r8
79r8 = mul_f32 r0 r8
80r7 = sub_f32 r1 r7
81r7 = mul_f32 r8 r7
82r8 = mul_f32 r7 r4
83r8 = mad_f32 r7 r3 r8
84r8 = mad_f32 r7 r2 r8
85r8 = mad_f32 r8 r5 r6
86r8 = to_i32 r8
87store8 arg(1) r8
88
89A8 over RGBA_8888
Mike Klein22ea7e92019-06-10 12:05:48 -05009039 values:
Mike Kleina1167ab2019-06-23 17:06:37 -040091v0 = splat 0 (0)
92v1 = load8 arg(0)
93v2 = splat 3B808081 (0.0039215689)
94v3 = to_f32 v1
95v4 = mul_f32 v2 v3
96v5 = load32 arg(1)
97v6 = splat FF (3.5733111e-43)
98v7 = extract v5 0 v6
99v8 = to_f32 v7
100v9 = mul_f32 v2 v8
101v10 = extract v5 8 v6
102v11 = to_f32 v10
103v12 = mul_f32 v2 v11
104v13 = extract v5 16 v6
105v14 = to_f32 v13
106v15 = mul_f32 v2 v14
107v16 = extract v5 24 v6
108v17 = to_f32 v16
109v18 = mul_f32 v2 v17
110v19 = splat 3F800000 (1)
111v20 = sub_f32 v19 v4
112v21 = mul_f32 v9 v20
113v22 = mul_f32 v12 v20
114v23 = mul_f32 v15 v20
115v24 = mad_f32 v18 v20 v4
116v25 = splat 437F0000 (255)
117v26 = splat 3F000000 (0.5)
118v27 = mad_f32 v21 v25 v26
119v28 = to_i32 v27
120v29 = mad_f32 v22 v25 v26
121v30 = to_i32 v29
122v31 = mad_f32 v23 v25 v26
123v32 = to_i32 v31
124v33 = mad_f32 v24 v25 v26
125v34 = to_i32 v33
126v35 = pack v28 v30 8
127v36 = pack v32 v34 8
128v37 = pack v35 v36 16
129store32 arg(1) v37
Mike Klein22ea7e92019-06-10 12:05:48 -0500130
Mike Kleina6307322019-06-07 15:44:26 -050013111 registers, 38 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500132r0 = splat 3B808081 (0.0039215689)
Mike Kleina6307322019-06-07 15:44:26 -0500133r1 = splat FF (3.5733111e-43)
134r2 = splat 3F800000 (1)
135r3 = splat 437F0000 (255)
136r4 = splat 3F000000 (0.5)
Mike Klein754bad32019-06-05 10:47:46 -0500137loop:
Mike Kleina6307322019-06-07 15:44:26 -0500138r5 = load8 arg(0)
Mike Klein9656dce2019-06-04 11:33:25 -0500139r5 = to_f32 r5
Mike Klein754bad32019-06-05 10:47:46 -0500140r5 = mul_f32 r0 r5
Mike Kleina6307322019-06-07 15:44:26 -0500141r6 = load32 arg(1)
142r7 = extract r6 0 r1
143r7 = to_f32 r7
144r7 = mul_f32 r0 r7
145r8 = extract r6 8 r1
146r8 = to_f32 r8
147r8 = mul_f32 r0 r8
148r9 = extract r6 16 r1
149r9 = to_f32 r9
150r9 = mul_f32 r0 r9
151r6 = extract r6 24 r1
152r6 = to_f32 r6
153r6 = mul_f32 r0 r6
154r10 = sub_f32 r2 r5
155r7 = mul_f32 r7 r10
156r8 = mul_f32 r8 r10
157r9 = mul_f32 r9 r10
Mike Klein0c334662019-06-23 15:52:37 -0400158r5 = mad_f32 r6 r10 r5
Mike Kleina6307322019-06-07 15:44:26 -0500159r7 = mad_f32 r7 r3 r4
Mike Klein267f5072019-06-03 16:27:46 -0500160r7 = to_i32 r7
Mike Kleina6307322019-06-07 15:44:26 -0500161r8 = mad_f32 r8 r3 r4
Mike Klein267f5072019-06-03 16:27:46 -0500162r8 = to_i32 r8
Mike Kleina6307322019-06-07 15:44:26 -0500163r9 = mad_f32 r9 r3 r4
Mike Klein754bad32019-06-05 10:47:46 -0500164r9 = to_i32 r9
Mike Klein0c334662019-06-23 15:52:37 -0400165r5 = mad_f32 r5 r3 r4
166r5 = to_i32 r5
Mike Kleina6307322019-06-07 15:44:26 -0500167r8 = pack r7 r8 8
Mike Klein0c334662019-06-23 15:52:37 -0400168r5 = pack r9 r5 8
169r5 = pack r8 r5 16
170store32 arg(1) r5
Mike Klein754bad32019-06-05 10:47:46 -0500171
172G8 over A8
Mike Klein22ea7e92019-06-10 12:05:48 -050017317 values:
Mike Kleina1167ab2019-06-23 17:06:37 -0400174v0 = load8 arg(0)
175v1 = splat 3B808081 (0.0039215689)
176v2 = to_f32 v0
177v3 = mul_f32 v1 v2
178v4 = splat 3F800000 (1)
179v5 = splat 0 (0)
180v6 = load8 arg(1)
181v7 = to_f32 v6
182v8 = mul_f32 v1 v7
183v9 = sub_f32 v4 v4
184v10 = mad_f32 v5 v9 v3
185v11 = mad_f32 v8 v9 v4
186v12 = splat 437F0000 (255)
187v13 = splat 3F000000 (0.5)
188v14 = mad_f32 v11 v12 v13
189v15 = to_i32 v14
190store8 arg(1) v15
Mike Klein22ea7e92019-06-10 12:05:48 -0500191
Mike Klein754bad32019-06-05 10:47:46 -05001926 registers, 12 instructions:
193r0 = splat 3B808081 (0.0039215689)
194r1 = splat 3F800000 (1)
195r2 = sub_f32 r1 r1
196r3 = splat 437F0000 (255)
197r4 = splat 3F000000 (0.5)
198loop:
199r5 = load8 arg(1)
200r5 = to_f32 r5
201r5 = mul_f32 r0 r5
202r5 = mad_f32 r5 r2 r1
203r5 = mad_f32 r5 r3 r4
204r5 = to_i32 r5
205store8 arg(1) r5
206
207G8 over G8
Mike Klein22ea7e92019-06-10 12:05:48 -050020822 values:
Mike Kleina1167ab2019-06-23 17:06:37 -0400209v0 = load8 arg(0)
210v1 = splat 3B808081 (0.0039215689)
211v2 = to_f32 v0
212v3 = mul_f32 v1 v2
213v4 = splat 3F800000 (1)
214v5 = load8 arg(1)
215v6 = to_f32 v5
216v7 = mul_f32 v1 v6
217v8 = sub_f32 v4 v4
218v9 = mad_f32 v7 v8 v3
219v10 = mad_f32 v4 v8 v4
220v11 = splat 3E59B3D0 (0.21259999)
221v12 = splat 3F371759 (0.71520001)
222v13 = splat 3D93DD98 (0.0722)
223v14 = mul_f32 v9 v13
224v15 = mad_f32 v9 v12 v14
225v16 = mad_f32 v9 v11 v15
226v17 = splat 437F0000 (255)
227v18 = splat 3F000000 (0.5)
228v19 = mad_f32 v16 v17 v18
229v20 = to_i32 v19
230store8 arg(1) v20
Mike Klein22ea7e92019-06-10 12:05:48 -0500231
Mike Klein754bad32019-06-05 10:47:46 -050023210 registers, 21 instructions:
233r0 = splat 3B808081 (0.0039215689)
234r1 = splat 3F800000 (1)
235r2 = sub_f32 r1 r1
236r3 = splat 3E59B3D0 (0.21259999)
237r4 = splat 3F371759 (0.71520001)
238r5 = splat 3D93DD98 (0.0722)
239r6 = splat 437F0000 (255)
240r7 = splat 3F000000 (0.5)
241loop:
242r8 = load8 arg(0)
243r8 = to_f32 r8
244r8 = mul_f32 r0 r8
245r9 = load8 arg(1)
246r9 = to_f32 r9
247r9 = mul_f32 r0 r9
Mike Klein0c334662019-06-23 15:52:37 -0400248r8 = mad_f32 r9 r2 r8
249r9 = mul_f32 r8 r5
250r9 = mad_f32 r8 r4 r9
251r9 = mad_f32 r8 r3 r9
252r9 = mad_f32 r9 r6 r7
253r9 = to_i32 r9
254store8 arg(1) r9
Mike Klein754bad32019-06-05 10:47:46 -0500255
256G8 over RGBA_8888
Mike Klein22ea7e92019-06-10 12:05:48 -050025738 values:
Mike Kleina1167ab2019-06-23 17:06:37 -0400258v0 = load8 arg(0)
259v1 = splat 3B808081 (0.0039215689)
260v2 = to_f32 v0
261v3 = mul_f32 v1 v2
262v4 = splat 3F800000 (1)
263v5 = load32 arg(1)
264v6 = splat FF (3.5733111e-43)
265v7 = extract v5 0 v6
266v8 = to_f32 v7
267v9 = mul_f32 v1 v8
268v10 = extract v5 8 v6
269v11 = to_f32 v10
270v12 = mul_f32 v1 v11
271v13 = extract v5 16 v6
272v14 = to_f32 v13
273v15 = mul_f32 v1 v14
274v16 = extract v5 24 v6
275v17 = to_f32 v16
276v18 = mul_f32 v1 v17
277v19 = sub_f32 v4 v4
278v20 = mad_f32 v9 v19 v3
279v21 = mad_f32 v12 v19 v3
280v22 = mad_f32 v15 v19 v3
281v23 = mad_f32 v18 v19 v4
282v24 = splat 437F0000 (255)
283v25 = splat 3F000000 (0.5)
284v26 = mad_f32 v20 v24 v25
285v27 = to_i32 v26
286v28 = mad_f32 v21 v24 v25
287v29 = to_i32 v28
288v30 = mad_f32 v22 v24 v25
289v31 = to_i32 v30
290v32 = mad_f32 v23 v24 v25
291v33 = to_i32 v32
292v34 = pack v27 v29 8
293v35 = pack v31 v33 8
294v36 = pack v34 v35 16
295store32 arg(1) v36
Mike Klein22ea7e92019-06-10 12:05:48 -0500296
Mike Kleina6307322019-06-07 15:44:26 -050029711 registers, 38 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500298r0 = splat 3B808081 (0.0039215689)
299r1 = splat 3F800000 (1)
Mike Kleina6307322019-06-07 15:44:26 -0500300r2 = splat FF (3.5733111e-43)
301r3 = sub_f32 r1 r1
302r4 = splat 437F0000 (255)
303r5 = splat 3F000000 (0.5)
304loop:
305r6 = load8 arg(0)
306r6 = to_f32 r6
307r6 = mul_f32 r0 r6
308r7 = load32 arg(1)
309r8 = extract r7 0 r2
310r8 = to_f32 r8
311r8 = mul_f32 r0 r8
312r9 = extract r7 8 r2
313r9 = to_f32 r9
314r9 = mul_f32 r0 r9
315r10 = extract r7 16 r2
316r10 = to_f32 r10
317r10 = mul_f32 r0 r10
318r7 = extract r7 24 r2
319r7 = to_f32 r7
320r7 = mul_f32 r0 r7
321r8 = mad_f32 r8 r3 r6
322r9 = mad_f32 r9 r3 r6
Mike Klein0c334662019-06-23 15:52:37 -0400323r6 = mad_f32 r10 r3 r6
Mike Kleina6307322019-06-07 15:44:26 -0500324r7 = mad_f32 r7 r3 r1
325r8 = mad_f32 r8 r4 r5
326r8 = to_i32 r8
327r9 = mad_f32 r9 r4 r5
328r9 = to_i32 r9
Mike Klein0c334662019-06-23 15:52:37 -0400329r6 = mad_f32 r6 r4 r5
330r6 = to_i32 r6
Mike Kleina6307322019-06-07 15:44:26 -0500331r7 = mad_f32 r7 r4 r5
332r7 = to_i32 r7
333r9 = pack r8 r9 8
Mike Klein0c334662019-06-23 15:52:37 -0400334r7 = pack r6 r7 8
Mike Kleina6307322019-06-07 15:44:26 -0500335r7 = pack r9 r7 16
336store32 arg(1) r7
337
338RGBA_8888 over A8
Mike Klein22ea7e92019-06-10 12:05:48 -050033930 values:
Mike Kleina1167ab2019-06-23 17:06:37 -0400340v0 = load32 arg(0)
341v1 = splat FF (3.5733111e-43)
342v2 = extract v0 0 v1
343v3 = splat 3B808081 (0.0039215689)
344v4 = to_f32 v2
345v5 = mul_f32 v3 v4
346v6 = extract v0 8 v1
347v7 = to_f32 v6
348v8 = mul_f32 v3 v7
349v9 = extract v0 16 v1
350v10 = to_f32 v9
351v11 = mul_f32 v3 v10
352v12 = extract v0 24 v1
353v13 = to_f32 v12
354v14 = mul_f32 v3 v13
355v15 = splat 0 (0)
356v16 = load8 arg(1)
357v17 = to_f32 v16
358v18 = mul_f32 v3 v17
359v19 = splat 3F800000 (1)
360v20 = sub_f32 v19 v14
361v21 = mad_f32 v15 v20 v5
362v22 = mad_f32 v15 v20 v8
363v23 = mad_f32 v15 v20 v11
364v24 = mad_f32 v18 v20 v14
365v25 = splat 437F0000 (255)
366v26 = splat 3F000000 (0.5)
367v27 = mad_f32 v24 v25 v26
368v28 = to_i32 v27
369store8 arg(1) v28
Mike Klein22ea7e92019-06-10 12:05:48 -0500370
Mike Kleina6307322019-06-07 15:44:26 -05003718 registers, 17 instructions:
372r0 = splat FF (3.5733111e-43)
373r1 = splat 3B808081 (0.0039215689)
374r2 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -0500375r3 = splat 437F0000 (255)
376r4 = splat 3F000000 (0.5)
377loop:
Mike Kleina6307322019-06-07 15:44:26 -0500378r5 = load32 arg(0)
379r5 = extract r5 24 r0
Mike Klein754bad32019-06-05 10:47:46 -0500380r5 = to_f32 r5
Mike Kleina6307322019-06-07 15:44:26 -0500381r5 = mul_f32 r1 r5
382r6 = load8 arg(1)
Mike Klein754bad32019-06-05 10:47:46 -0500383r6 = to_f32 r6
Mike Kleina6307322019-06-07 15:44:26 -0500384r6 = mul_f32 r1 r6
385r7 = sub_f32 r2 r5
Mike Klein0c334662019-06-23 15:52:37 -0400386r5 = mad_f32 r6 r7 r5
387r5 = mad_f32 r5 r3 r4
388r5 = to_i32 r5
389store8 arg(1) r5
Mike Klein754bad32019-06-05 10:47:46 -0500390
Mike Kleina6307322019-06-07 15:44:26 -0500391RGBA_8888 over G8
Mike Klein22ea7e92019-06-10 12:05:48 -050039235 values:
Mike Kleina1167ab2019-06-23 17:06:37 -0400393v0 = load32 arg(0)
394v1 = splat FF (3.5733111e-43)
395v2 = extract v0 0 v1
396v3 = splat 3B808081 (0.0039215689)
397v4 = to_f32 v2
398v5 = mul_f32 v3 v4
399v6 = extract v0 8 v1
400v7 = to_f32 v6
401v8 = mul_f32 v3 v7
402v9 = extract v0 16 v1
403v10 = to_f32 v9
404v11 = mul_f32 v3 v10
405v12 = extract v0 24 v1
406v13 = to_f32 v12
407v14 = mul_f32 v3 v13
408v15 = load8 arg(1)
409v16 = to_f32 v15
410v17 = mul_f32 v3 v16
411v18 = splat 3F800000 (1)
412v19 = sub_f32 v18 v14
413v20 = mad_f32 v17 v19 v5
414v21 = mad_f32 v17 v19 v8
415v22 = mad_f32 v17 v19 v11
416v23 = mad_f32 v18 v19 v14
417v24 = splat 3E59B3D0 (0.21259999)
418v25 = splat 3F371759 (0.71520001)
419v26 = splat 3D93DD98 (0.0722)
420v27 = mul_f32 v22 v26
421v28 = mad_f32 v21 v25 v27
422v29 = mad_f32 v20 v24 v28
423v30 = splat 437F0000 (255)
424v31 = splat 3F000000 (0.5)
425v32 = mad_f32 v29 v30 v31
426v33 = to_i32 v32
427store8 arg(1) v33
Mike Klein22ea7e92019-06-10 12:05:48 -0500428
Mike Kleina6307322019-06-07 15:44:26 -050042913 registers, 34 instructions:
430r0 = splat FF (3.5733111e-43)
431r1 = splat 3B808081 (0.0039215689)
432r2 = splat 3F800000 (1)
433r3 = splat 3E59B3D0 (0.21259999)
434r4 = splat 3F371759 (0.71520001)
435r5 = splat 3D93DD98 (0.0722)
436r6 = splat 437F0000 (255)
437r7 = splat 3F000000 (0.5)
Mike Klein754bad32019-06-05 10:47:46 -0500438loop:
Mike Kleina6307322019-06-07 15:44:26 -0500439r8 = load32 arg(0)
440r9 = extract r8 0 r0
Mike Klein754bad32019-06-05 10:47:46 -0500441r9 = to_f32 r9
Mike Kleina6307322019-06-07 15:44:26 -0500442r9 = mul_f32 r1 r9
443r10 = extract r8 8 r0
Mike Klein754bad32019-06-05 10:47:46 -0500444r10 = to_f32 r10
Mike Kleina6307322019-06-07 15:44:26 -0500445r10 = mul_f32 r1 r10
446r11 = extract r8 16 r0
Mike Klein754bad32019-06-05 10:47:46 -0500447r11 = to_f32 r11
Mike Kleina6307322019-06-07 15:44:26 -0500448r11 = mul_f32 r1 r11
449r8 = extract r8 24 r0
Mike Klein754bad32019-06-05 10:47:46 -0500450r8 = to_f32 r8
Mike Kleina6307322019-06-07 15:44:26 -0500451r8 = mul_f32 r1 r8
452r12 = load8 arg(1)
453r12 = to_f32 r12
454r12 = mul_f32 r1 r12
455r8 = sub_f32 r2 r8
456r9 = mad_f32 r12 r8 r9
457r10 = mad_f32 r12 r8 r10
Mike Klein0c334662019-06-23 15:52:37 -0400458r11 = mad_f32 r12 r8 r11
459r11 = mul_f32 r11 r5
460r11 = mad_f32 r10 r4 r11
461r11 = mad_f32 r9 r3 r11
462r11 = mad_f32 r11 r6 r7
463r11 = to_i32 r11
464store8 arg(1) r11
Mike Kleina6307322019-06-07 15:44:26 -0500465
466RGBA_8888 over RGBA_8888
Mike Klein22ea7e92019-06-10 12:05:48 -050046748 values:
Mike Kleina1167ab2019-06-23 17:06:37 -0400468v0 = load32 arg(0)
469v1 = splat FF (3.5733111e-43)
470v2 = extract v0 0 v1
471v3 = splat 3B808081 (0.0039215689)
472v4 = to_f32 v2
473v5 = mul_f32 v3 v4
474v6 = extract v0 8 v1
475v7 = to_f32 v6
476v8 = mul_f32 v3 v7
477v9 = extract v0 16 v1
478v10 = to_f32 v9
479v11 = mul_f32 v3 v10
480v12 = extract v0 24 v1
481v13 = to_f32 v12
482v14 = mul_f32 v3 v13
483v15 = load32 arg(1)
484v16 = extract v15 0 v1
485v17 = to_f32 v16
486v18 = mul_f32 v3 v17
487v19 = extract v15 8 v1
488v20 = to_f32 v19
489v21 = mul_f32 v3 v20
490v22 = extract v15 16 v1
491v23 = to_f32 v22
492v24 = mul_f32 v3 v23
493v25 = extract v15 24 v1
494v26 = to_f32 v25
495v27 = mul_f32 v3 v26
496v28 = splat 3F800000 (1)
497v29 = sub_f32 v28 v14
498v30 = mad_f32 v18 v29 v5
499v31 = mad_f32 v21 v29 v8
500v32 = mad_f32 v24 v29 v11
501v33 = mad_f32 v27 v29 v14
502v34 = splat 437F0000 (255)
503v35 = splat 3F000000 (0.5)
504v36 = mad_f32 v30 v34 v35
505v37 = to_i32 v36
506v38 = mad_f32 v31 v34 v35
507v39 = to_i32 v38
508v40 = mad_f32 v32 v34 v35
509v41 = to_i32 v40
510v42 = mad_f32 v33 v34 v35
511v43 = to_i32 v42
512v44 = pack v37 v39 8
513v45 = pack v41 v43 8
514v46 = pack v44 v45 16
515store32 arg(1) v46
Mike Klein22ea7e92019-06-10 12:05:48 -0500516
Mike Kleina6307322019-06-07 15:44:26 -050051714 registers, 48 instructions:
518r0 = splat FF (3.5733111e-43)
519r1 = splat 3B808081 (0.0039215689)
520r2 = splat 3F800000 (1)
521r3 = splat 437F0000 (255)
522r4 = splat 3F000000 (0.5)
523loop:
524r5 = load32 arg(0)
525r6 = extract r5 0 r0
526r6 = to_f32 r6
527r6 = mul_f32 r1 r6
528r7 = extract r5 8 r0
529r7 = to_f32 r7
530r7 = mul_f32 r1 r7
531r8 = extract r5 16 r0
532r8 = to_f32 r8
533r8 = mul_f32 r1 r8
534r5 = extract r5 24 r0
535r5 = to_f32 r5
536r5 = mul_f32 r1 r5
537r9 = load32 arg(1)
538r10 = extract r9 0 r0
539r10 = to_f32 r10
540r10 = mul_f32 r1 r10
541r11 = extract r9 8 r0
542r11 = to_f32 r11
543r11 = mul_f32 r1 r11
544r12 = extract r9 16 r0
545r12 = to_f32 r12
546r12 = mul_f32 r1 r12
547r9 = extract r9 24 r0
548r9 = to_f32 r9
549r9 = mul_f32 r1 r9
550r13 = sub_f32 r2 r5
Mike Klein0c334662019-06-23 15:52:37 -0400551r6 = mad_f32 r10 r13 r6
552r7 = mad_f32 r11 r13 r7
553r8 = mad_f32 r12 r13 r8
554r5 = mad_f32 r9 r13 r5
555r6 = mad_f32 r6 r3 r4
556r6 = to_i32 r6
557r7 = mad_f32 r7 r3 r4
558r7 = to_i32 r7
559r8 = mad_f32 r8 r3 r4
560r8 = to_i32 r8
561r5 = mad_f32 r5 r3 r4
562r5 = to_i32 r5
563r7 = pack r6 r7 8
564r5 = pack r8 r5 8
565r5 = pack r7 r5 16
566store32 arg(1) r5
Mike Klein267f5072019-06-03 16:27:46 -0500567
Mike Klein397fc882019-06-20 11:37:10 -0500568I32 (Naive) 8888 over 8888
56911 registers, 29 instructions:
570r0 = splat FF (3.5733111e-43)
571r1 = splat 100 (3.5873241e-43)
572loop:
573r2 = load32 arg(0)
574r3 = extract r2 0 r0
575r4 = extract r2 8 r0
576r5 = extract r2 16 r0
577r2 = extract r2 24 r0
578r6 = load32 arg(1)
579r7 = extract r6 0 r0
580r8 = extract r6 8 r0
581r9 = extract r6 16 r0
582r6 = extract r6 24 r0
583r10 = sub_i32 r1 r2
584r7 = mul_i32 r7 r10
585r7 = shr r7 8
586r7 = add_i32 r3 r7
587r8 = mul_i32 r8 r10
588r8 = shr r8 8
589r8 = add_i32 r4 r8
590r9 = mul_i32 r9 r10
591r9 = shr r9 8
592r9 = add_i32 r5 r9
593r10 = mul_i32 r6 r10
594r10 = shr r10 8
595r10 = add_i32 r2 r10
596r8 = pack r7 r8 8
597r10 = pack r9 r10 8
598r10 = pack r8 r10 16
599store32 arg(1) r10
600
Mike Klein7b7077c2019-06-03 17:10:59 -0500601I32 8888 over 8888
Mike Klein821f5e82019-06-13 10:56:51 -050060211 registers, 29 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500603r0 = splat FF (3.5733111e-43)
Mike Klein821f5e82019-06-13 10:56:51 -0500604r1 = splat 100 (3.5873241e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500605loop:
606r2 = load32 arg(0)
Mike Klein342b1b22019-06-13 16:43:18 -0500607r3 = bit_and r2 r0
608r4 = bytes r2 2
609r5 = bytes r2 3
Mike Klein35389082019-06-13 11:29:26 -0500610r2 = shr r2 24
Mike Klein821f5e82019-06-13 10:56:51 -0500611r6 = load32 arg(1)
Mike Klein342b1b22019-06-13 16:43:18 -0500612r7 = bit_and r6 r0
613r8 = bytes r6 2
614r9 = bytes r6 3
Mike Klein35389082019-06-13 11:29:26 -0500615r6 = shr r6 24
Mike Klein821f5e82019-06-13 10:56:51 -0500616r10 = sub_i32 r1 r2
Mike Klein35389082019-06-13 11:29:26 -0500617r7 = mul_i16x2 r7 r10
Mike Klein821f5e82019-06-13 10:56:51 -0500618r7 = shr r7 8
Mike Klein3f593792019-06-12 12:54:52 -0500619r7 = add_i32 r3 r7
Mike Klein35389082019-06-13 11:29:26 -0500620r8 = mul_i16x2 r8 r10
Mike Klein821f5e82019-06-13 10:56:51 -0500621r8 = shr r8 8
622r8 = add_i32 r4 r8
Mike Klein35389082019-06-13 11:29:26 -0500623r9 = mul_i16x2 r9 r10
Mike Klein821f5e82019-06-13 10:56:51 -0500624r9 = shr r9 8
625r9 = add_i32 r5 r9
Mike Klein35389082019-06-13 11:29:26 -0500626r10 = mul_i16x2 r6 r10
Mike Klein821f5e82019-06-13 10:56:51 -0500627r10 = shr r10 8
628r10 = add_i32 r2 r10
629r8 = pack r7 r8 8
630r10 = pack r9 r10 8
631r10 = pack r8 r10 16
632store32 arg(1) r10
633
634I32 (SWAR) 8888 over 8888
Mike Klein2b7b2a22019-06-23 20:35:28 -04006356 registers, 15 instructions:
Mike Klein7f061fb2019-06-13 13:12:38 -0500636r0 = splat 1000100 (2.3510604e-38)
637r1 = splat FF00FF (2.3418409e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500638loop:
Mike Klein2b7b2a22019-06-23 20:35:28 -0400639r2 = load32 arg(0)
640r3 = bytes r2 404
641r3 = sub_i16x2 r0 r3
642r4 = load32 arg(1)
643r5 = bit_and r4 r1
644r4 = shr_i16x2 r4 8
645r5 = mul_i16x2 r5 r3
Mike Klein4c4945a2019-06-13 15:51:39 -0500646r5 = shr_i16x2 r5 8
Mike Klein2b7b2a22019-06-23 20:35:28 -0400647r3 = mul_i16x2 r4 r3
648r3 = bit_clear r3 r1
649r3 = bit_or r5 r3
650r3 = add_i32 r2 r3
651store32 arg(1) r3
Mike Klein7b7077c2019-06-03 17:10:59 -0500652