blob: c6745404547143118e9f19fc42a7aefa13f61c5f [file] [log] [blame]
Mike Klein267f5072019-06-03 16:27:46 -05001A8 over A8
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600212 values:
3 v0 = load8 arg(0)
4 v1 = to_f32 v0
5 v2 = mul_f32 v1 3B808081 (0.0039215689)
6 v3 = load8 arg(1)
7 v4 = to_f32 v3
8 v5 = mul_f32 v4 3B808081 (0.0039215689)
9 v6 = splat 3F800000 (1)
10 v7 = sub_f32 v6 v2
11 v8 = mad_f32 v5 v7 v2
12 v9 = mul_f32 v8 437F0000 (255)
13 v10 = round v9
14 store8 arg(1) v10
Mike Klein22ea7e92019-06-10 12:05:48 -050015
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600164 registers, 12 instructions:
17r0 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -050018loop:
Mike Klein8c1e0ef2019-11-12 09:07:23 -060019 r1 = load8 arg(0)
20 r1 = to_f32 r1
21 r1 = mul_f32 r1 3B808081 (0.0039215689)
22 r2 = load8 arg(1)
23 r2 = to_f32 r2
24 r2 = mul_f32 r2 3B808081 (0.0039215689)
25 r3 = sub_f32 r0 r1
26 r1 = mad_f32 r2 r3 r1
27 r1 = mul_f32 r1 437F0000 (255)
28 r1 = round r1
29 store8 arg(1) r1
30
31A8 over G8
3217 values:
33 v0 = load8 arg(1)
34 v1 = to_f32 v0
35 v2 = mul_f32 v1 3B808081 (0.0039215689)
36 v3 = load8 arg(0)
37 v4 = to_f32 v3
38 v5 = mul_f32 v4 3B808081 (0.0039215689)
39 v6 = splat 3F800000 (1)
40 v7 = sub_f32 v6 v5
41 v8 = mul_f32 v2 v7
42 v9 = splat 3E59B3D0 (0.21259999)
43 v10 = splat 3F371759 (0.71520001)
44 v11 = mul_f32 v8 3D93DD98 (0.0722)
45 v12 = mad_f32 v8 v10 v11
46 v13 = mad_f32 v8 v9 v12
47 v14 = mul_f32 v13 437F0000 (255)
48 v15 = round v14
49 store8 arg(1) v15
50
515 registers, 17 instructions:
52r0 = splat 3F800000 (1)
53r1 = splat 3E59B3D0 (0.21259999)
54r2 = splat 3F371759 (0.71520001)
55loop:
56 r3 = load8 arg(1)
Mike Klein4135cf02019-11-08 14:18:06 -060057 r3 = to_f32 r3
Mike Klein8c1e0ef2019-11-12 09:07:23 -060058 r3 = mul_f32 r3 3B808081 (0.0039215689)
59 r4 = load8 arg(0)
Mike Kleine8356ad2019-11-04 20:14:24 -060060 r4 = to_f32 r4
Mike Klein8c1e0ef2019-11-12 09:07:23 -060061 r4 = mul_f32 r4 3B808081 (0.0039215689)
62 r4 = sub_f32 r0 r4
63 r4 = mul_f32 r3 r4
64 r3 = mul_f32 r4 3D93DD98 (0.0722)
65 r3 = mad_f32 r4 r2 r3
66 r3 = mad_f32 r4 r1 r3
67 r3 = mul_f32 r3 437F0000 (255)
Mike Klein4135cf02019-11-08 14:18:06 -060068 r3 = round r3
69 store8 arg(1) r3
Mike Klein267f5072019-06-03 16:27:46 -050070
Mike Klein754bad32019-06-05 10:47:46 -050071A8 over RGBA_8888
Mike Klein8c1e0ef2019-11-12 09:07:23 -06007235 values:
73 v0 = load32 arg(1)
74 v1 = splat FF (3.5733111e-43)
75 v2 = extract v0 0 v1
76 v3 = to_f32 v2
77 v4 = mul_f32 v3 3B808081 (0.0039215689)
78 v5 = load8 arg(0)
79 v6 = to_f32 v5
80 v7 = mul_f32 v6 3B808081 (0.0039215689)
81 v8 = splat 3F800000 (1)
82 v9 = sub_f32 v8 v7
83 v10 = mul_f32 v4 v9
84 v11 = mul_f32 v10 437F0000 (255)
85 v12 = round v11
86 v13 = extract v0 8 v1
87 v14 = to_f32 v13
88 v15 = mul_f32 v14 3B808081 (0.0039215689)
89 v16 = mul_f32 v15 v9
90 v17 = mul_f32 v16 437F0000 (255)
91 v18 = round v17
92 v19 = pack v12 v18 8
93 v20 = extract v0 16 v1
94 v21 = to_f32 v20
95 v22 = mul_f32 v21 3B808081 (0.0039215689)
96 v23 = mul_f32 v22 v9
97 v24 = mul_f32 v23 437F0000 (255)
98 v25 = round v24
99 v26 = extract v0 24 v1
100 v27 = to_f32 v26
101 v28 = mul_f32 v27 3B808081 (0.0039215689)
102 v29 = mad_f32 v28 v9 v7
103 v30 = mul_f32 v29 437F0000 (255)
104 v31 = round v30
105 v32 = pack v25 v31 8
106 v33 = pack v19 v32 16
107 store32 arg(1) v33
Mike Klein22ea7e92019-06-10 12:05:48 -0500108
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001097 registers, 35 instructions:
110r0 = splat FF (3.5733111e-43)
111r1 = splat 3F800000 (1)
Mike Klein754bad32019-06-05 10:47:46 -0500112loop:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600113 r2 = load32 arg(1)
114 r3 = extract r2 0 r0
115 r3 = to_f32 r3
116 r3 = mul_f32 r3 3B808081 (0.0039215689)
117 r4 = load8 arg(0)
Mike Klein4135cf02019-11-08 14:18:06 -0600118 r4 = to_f32 r4
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600119 r4 = mul_f32 r4 3B808081 (0.0039215689)
120 r5 = sub_f32 r1 r4
121 r3 = mul_f32 r3 r5
122 r3 = mul_f32 r3 437F0000 (255)
123 r3 = round r3
124 r6 = extract r2 8 r0
125 r6 = to_f32 r6
126 r6 = mul_f32 r6 3B808081 (0.0039215689)
127 r6 = mul_f32 r6 r5
128 r6 = mul_f32 r6 437F0000 (255)
Mike Klein4135cf02019-11-08 14:18:06 -0600129 r6 = round r6
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600130 r6 = pack r3 r6 8
131 r3 = extract r2 16 r0
132 r3 = to_f32 r3
133 r3 = mul_f32 r3 3B808081 (0.0039215689)
134 r3 = mul_f32 r3 r5
135 r3 = mul_f32 r3 437F0000 (255)
136 r3 = round r3
137 r2 = extract r2 24 r0
138 r2 = to_f32 r2
139 r2 = mul_f32 r2 3B808081 (0.0039215689)
140 r4 = mad_f32 r2 r5 r4
141 r4 = mul_f32 r4 437F0000 (255)
142 r4 = round r4
143 r4 = pack r3 r4 8
144 r4 = pack r6 r4 16
145 store32 arg(1) r4
Mike Klein754bad32019-06-05 10:47:46 -0500146
147G8 over A8
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001489 values:
Mike Kleind48488b2019-10-22 12:27:58 -0500149 v0 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600150 v1 = load8 arg(1)
151 v2 = to_f32 v1
152 v3 = mul_f32 v2 3B808081 (0.0039215689)
153 v4 = sub_f32 v0 v0
154 v5 = mad_f32 v3 v4 v0
155 v6 = mul_f32 v5 437F0000 (255)
156 v7 = round v6
157 store8 arg(1) v7
Mike Klein22ea7e92019-06-10 12:05:48 -0500158
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001593 registers, 9 instructions:
Mike Kleind48488b2019-10-22 12:27:58 -0500160r0 = splat 3F800000 (1)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600161r1 = sub_f32 r0 r0
Mike Klein754bad32019-06-05 10:47:46 -0500162loop:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600163 r2 = load8 arg(1)
164 r2 = to_f32 r2
165 r2 = mul_f32 r2 3B808081 (0.0039215689)
166 r2 = mad_f32 r2 r1 r0
167 r2 = mul_f32 r2 437F0000 (255)
168 r2 = round r2
169 store8 arg(1) r2
Mike Klein754bad32019-06-05 10:47:46 -0500170
171G8 over G8
Mike Klein8c1e0ef2019-11-12 09:07:23 -060017217 values:
173 v0 = load8 arg(0)
174 v1 = to_f32 v0
175 v2 = mul_f32 v1 3B808081 (0.0039215689)
176 v3 = load8 arg(1)
Mike Kleind48488b2019-10-22 12:27:58 -0500177 v4 = to_f32 v3
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600178 v5 = mul_f32 v4 3B808081 (0.0039215689)
179 v6 = splat 3F800000 (1)
180 v7 = sub_f32 v6 v6
181 v8 = mad_f32 v5 v7 v2
182 v9 = splat 3E59B3D0 (0.21259999)
183 v10 = splat 3F371759 (0.71520001)
184 v11 = mul_f32 v8 3D93DD98 (0.0722)
185 v12 = mad_f32 v8 v10 v11
186 v13 = mad_f32 v8 v9 v12
187 v14 = mul_f32 v13 437F0000 (255)
188 v15 = round v14
189 store8 arg(1) v15
Mike Klein22ea7e92019-06-10 12:05:48 -0500190
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001915 registers, 17 instructions:
192r0 = splat 3F800000 (1)
193r0 = sub_f32 r0 r0
194r1 = splat 3E59B3D0 (0.21259999)
195r2 = splat 3F371759 (0.71520001)
Mike Klein754bad32019-06-05 10:47:46 -0500196loop:
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600197 r3 = load8 arg(0)
198 r3 = to_f32 r3
199 r3 = mul_f32 r3 3B808081 (0.0039215689)
200 r4 = load8 arg(1)
Mike Klein4135cf02019-11-08 14:18:06 -0600201 r4 = to_f32 r4
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600202 r4 = mul_f32 r4 3B808081 (0.0039215689)
203 r3 = mad_f32 r4 r0 r3
204 r4 = mul_f32 r3 3D93DD98 (0.0722)
205 r4 = mad_f32 r3 r2 r4
206 r4 = mad_f32 r3 r1 r4
207 r4 = mul_f32 r4 437F0000 (255)
Mike Klein4135cf02019-11-08 14:18:06 -0600208 r4 = round r4
209 store8 arg(1) r4
Mike Klein754bad32019-06-05 10:47:46 -0500210
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600211G8 over RGBA_8888
21235 values:
213 v0 = load8 arg(0)
214 v1 = to_f32 v0
215 v2 = mul_f32 v1 3B808081 (0.0039215689)
216 v3 = load32 arg(1)
217 v4 = splat FF (3.5733111e-43)
218 v5 = extract v3 0 v4
219 v6 = to_f32 v5
220 v7 = mul_f32 v6 3B808081 (0.0039215689)
221 v8 = splat 3F800000 (1)
222 v9 = sub_f32 v8 v8
223 v10 = mad_f32 v7 v9 v2
224 v11 = mul_f32 v10 437F0000 (255)
225 v12 = round v11
226 v13 = extract v3 8 v4
227 v14 = to_f32 v13
228 v15 = mul_f32 v14 3B808081 (0.0039215689)
229 v16 = mad_f32 v15 v9 v2
230 v17 = mul_f32 v16 437F0000 (255)
Mike Klein4135cf02019-11-08 14:18:06 -0600231 v18 = round v17
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600232 v19 = pack v12 v18 8
233 v20 = extract v3 16 v4
234 v21 = to_f32 v20
235 v22 = mul_f32 v21 3B808081 (0.0039215689)
236 v23 = mad_f32 v22 v9 v2
237 v24 = mul_f32 v23 437F0000 (255)
238 v25 = round v24
239 v26 = extract v3 24 v4
240 v27 = to_f32 v26
241 v28 = mul_f32 v27 3B808081 (0.0039215689)
242 v29 = mad_f32 v28 v9 v8
243 v30 = mul_f32 v29 437F0000 (255)
244 v31 = round v30
245 v32 = pack v25 v31 8
246 v33 = pack v19 v32 16
247 store32 arg(1) v33
Mike Klein22ea7e92019-06-10 12:05:48 -0500248
Mike Klein8c1e0ef2019-11-12 09:07:23 -06002497 registers, 35 instructions:
250r0 = splat FF (3.5733111e-43)
251r1 = splat 3F800000 (1)
252r2 = sub_f32 r1 r1
253loop:
254 r3 = load8 arg(0)
255 r3 = to_f32 r3
256 r3 = mul_f32 r3 3B808081 (0.0039215689)
257 r4 = load32 arg(1)
258 r5 = extract r4 0 r0
259 r5 = to_f32 r5
260 r5 = mul_f32 r5 3B808081 (0.0039215689)
261 r5 = mad_f32 r5 r2 r3
262 r5 = mul_f32 r5 437F0000 (255)
263 r5 = round r5
264 r6 = extract r4 8 r0
265 r6 = to_f32 r6
266 r6 = mul_f32 r6 3B808081 (0.0039215689)
267 r6 = mad_f32 r6 r2 r3
268 r6 = mul_f32 r6 437F0000 (255)
269 r6 = round r6
270 r6 = pack r5 r6 8
271 r5 = extract r4 16 r0
272 r5 = to_f32 r5
273 r5 = mul_f32 r5 3B808081 (0.0039215689)
274 r3 = mad_f32 r5 r2 r3
275 r3 = mul_f32 r3 437F0000 (255)
276 r3 = round r3
277 r4 = extract r4 24 r0
278 r4 = to_f32 r4
279 r4 = mul_f32 r4 3B808081 (0.0039215689)
280 r4 = mad_f32 r4 r2 r1
281 r4 = mul_f32 r4 437F0000 (255)
282 r4 = round r4
283 r4 = pack r3 r4 8
284 r4 = pack r6 r4 16
285 store32 arg(1) r4
286
287RGBA_8888 over A8
28814 values:
289 v0 = load32 arg(0)
290 v1 = splat FF (3.5733111e-43)
291 v2 = extract v0 24 v1
292 v3 = to_f32 v2
293 v4 = mul_f32 v3 3B808081 (0.0039215689)
294 v5 = load8 arg(1)
295 v6 = to_f32 v5
296 v7 = mul_f32 v6 3B808081 (0.0039215689)
297 v8 = splat 3F800000 (1)
298 v9 = sub_f32 v8 v4
299 v10 = mad_f32 v7 v9 v4
300 v11 = mul_f32 v10 437F0000 (255)
301 v12 = round v11
302 store8 arg(1) v12
303
3045 registers, 14 instructions:
305r0 = splat FF (3.5733111e-43)
306r1 = splat 3F800000 (1)
307loop:
308 r2 = load32 arg(0)
309 r2 = extract r2 24 r0
310 r2 = to_f32 r2
311 r2 = mul_f32 r2 3B808081 (0.0039215689)
312 r3 = load8 arg(1)
313 r3 = to_f32 r3
314 r3 = mul_f32 r3 3B808081 (0.0039215689)
315 r4 = sub_f32 r1 r2
316 r2 = mad_f32 r3 r4 r2
317 r2 = mul_f32 r2 437F0000 (255)
318 r2 = round r2
319 store8 arg(1) r2
320
321RGBA_8888 over G8
32230 values:
323 v0 = load32 arg(0)
324 v1 = splat FF (3.5733111e-43)
325 v2 = extract v0 0 v1
326 v3 = to_f32 v2
327 v4 = mul_f32 v3 3B808081 (0.0039215689)
328 v5 = load8 arg(1)
329 v6 = to_f32 v5
330 v7 = mul_f32 v6 3B808081 (0.0039215689)
331 v8 = extract v0 24 v1
332 v9 = to_f32 v8
333 v10 = mul_f32 v9 3B808081 (0.0039215689)
334 v11 = splat 3F800000 (1)
335 v12 = sub_f32 v11 v10
336 v13 = mad_f32 v7 v12 v4
337 v14 = splat 3E59B3D0 (0.21259999)
338 v15 = extract v0 8 v1
339 v16 = to_f32 v15
340 v17 = mul_f32 v16 3B808081 (0.0039215689)
341 v18 = mad_f32 v7 v12 v17
342 v19 = splat 3F371759 (0.71520001)
343 v20 = extract v0 16 v1
344 v21 = to_f32 v20
345 v22 = mul_f32 v21 3B808081 (0.0039215689)
346 v23 = mad_f32 v7 v12 v22
347 v24 = mul_f32 v23 3D93DD98 (0.0722)
348 v25 = mad_f32 v18 v19 v24
349 v26 = mad_f32 v13 v14 v25
350 v27 = mul_f32 v26 437F0000 (255)
351 v28 = round v27
352 store8 arg(1) v28
353
3549 registers, 30 instructions:
355r0 = splat FF (3.5733111e-43)
356r1 = splat 3F800000 (1)
357r2 = splat 3E59B3D0 (0.21259999)
358r3 = splat 3F371759 (0.71520001)
Mike Kleina6307322019-06-07 15:44:26 -0500359loop:
Mike Klein4135cf02019-11-08 14:18:06 -0600360 r4 = load32 arg(0)
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600361 r5 = extract r4 0 r0
Mike Kleine8356ad2019-11-04 20:14:24 -0600362 r5 = to_f32 r5
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600363 r5 = mul_f32 r5 3B808081 (0.0039215689)
364 r6 = load8 arg(1)
Mike Klein4135cf02019-11-08 14:18:06 -0600365 r6 = to_f32 r6
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600366 r6 = mul_f32 r6 3B808081 (0.0039215689)
367 r7 = extract r4 24 r0
368 r7 = to_f32 r7
369 r7 = mul_f32 r7 3B808081 (0.0039215689)
370 r7 = sub_f32 r1 r7
371 r5 = mad_f32 r6 r7 r5
372 r8 = extract r4 8 r0
373 r8 = to_f32 r8
374 r8 = mul_f32 r8 3B808081 (0.0039215689)
375 r8 = mad_f32 r6 r7 r8
376 r4 = extract r4 16 r0
377 r4 = to_f32 r4
378 r4 = mul_f32 r4 3B808081 (0.0039215689)
379 r4 = mad_f32 r6 r7 r4
380 r4 = mul_f32 r4 3D93DD98 (0.0722)
381 r4 = mad_f32 r8 r3 r4
382 r4 = mad_f32 r5 r2 r4
383 r4 = mul_f32 r4 437F0000 (255)
384 r4 = round r4
385 store8 arg(1) r4
386
387RGBA_8888 over RGBA_8888
38845 values:
389 v0 = load32 arg(0)
390 v1 = splat FF (3.5733111e-43)
391 v2 = extract v0 0 v1
392 v3 = to_f32 v2
393 v4 = mul_f32 v3 3B808081 (0.0039215689)
394 v5 = load32 arg(1)
395 v6 = extract v5 0 v1
396 v7 = to_f32 v6
397 v8 = mul_f32 v7 3B808081 (0.0039215689)
398 v9 = extract v0 24 v1
399 v10 = to_f32 v9
400 v11 = mul_f32 v10 3B808081 (0.0039215689)
401 v12 = splat 3F800000 (1)
402 v13 = sub_f32 v12 v11
403 v14 = mad_f32 v8 v13 v4
404 v15 = mul_f32 v14 437F0000 (255)
405 v16 = round v15
406 v17 = extract v0 8 v1
407 v18 = to_f32 v17
408 v19 = mul_f32 v18 3B808081 (0.0039215689)
409 v20 = extract v5 8 v1
410 v21 = to_f32 v20
411 v22 = mul_f32 v21 3B808081 (0.0039215689)
412 v23 = mad_f32 v22 v13 v19
413 v24 = mul_f32 v23 437F0000 (255)
414 v25 = round v24
415 v26 = pack v16 v25 8
416 v27 = extract v0 16 v1
417 v28 = to_f32 v27
418 v29 = mul_f32 v28 3B808081 (0.0039215689)
419 v30 = extract v5 16 v1
420 v31 = to_f32 v30
421 v32 = mul_f32 v31 3B808081 (0.0039215689)
422 v33 = mad_f32 v32 v13 v29
423 v34 = mul_f32 v33 437F0000 (255)
424 v35 = round v34
425 v36 = extract v5 24 v1
426 v37 = to_f32 v36
427 v38 = mul_f32 v37 3B808081 (0.0039215689)
428 v39 = mad_f32 v38 v13 v11
429 v40 = mul_f32 v39 437F0000 (255)
430 v41 = round v40
431 v42 = pack v35 v41 8
432 v43 = pack v26 v42 16
433 store32 arg(1) v43
434
4359 registers, 45 instructions:
436r0 = splat FF (3.5733111e-43)
437r1 = splat 3F800000 (1)
438loop:
439 r2 = load32 arg(0)
440 r3 = extract r2 0 r0
441 r3 = to_f32 r3
442 r3 = mul_f32 r3 3B808081 (0.0039215689)
443 r4 = load32 arg(1)
444 r5 = extract r4 0 r0
445 r5 = to_f32 r5
446 r5 = mul_f32 r5 3B808081 (0.0039215689)
447 r6 = extract r2 24 r0
448 r6 = to_f32 r6
449 r6 = mul_f32 r6 3B808081 (0.0039215689)
450 r7 = sub_f32 r1 r6
451 r3 = mad_f32 r5 r7 r3
452 r3 = mul_f32 r3 437F0000 (255)
453 r3 = round r3
454 r5 = extract r2 8 r0
455 r5 = to_f32 r5
456 r5 = mul_f32 r5 3B808081 (0.0039215689)
457 r8 = extract r4 8 r0
458 r8 = to_f32 r8
459 r8 = mul_f32 r8 3B808081 (0.0039215689)
460 r5 = mad_f32 r8 r7 r5
461 r5 = mul_f32 r5 437F0000 (255)
462 r5 = round r5
463 r5 = pack r3 r5 8
464 r2 = extract r2 16 r0
465 r2 = to_f32 r2
466 r2 = mul_f32 r2 3B808081 (0.0039215689)
467 r3 = extract r4 16 r0
468 r3 = to_f32 r3
469 r3 = mul_f32 r3 3B808081 (0.0039215689)
470 r2 = mad_f32 r3 r7 r2
471 r2 = mul_f32 r2 437F0000 (255)
472 r2 = round r2
473 r4 = extract r4 24 r0
474 r4 = to_f32 r4
475 r4 = mul_f32 r4 3B808081 (0.0039215689)
476 r6 = mad_f32 r4 r7 r6
477 r6 = mul_f32 r6 437F0000 (255)
478 r6 = round r6
479 r6 = pack r2 r6 8
480 r6 = pack r5 r6 16
481 store32 arg(1) r6
Mike Klein267f5072019-06-03 16:27:46 -0500482
Mike Klein397fc882019-06-20 11:37:10 -0500483I32 (Naive) 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050048429 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500485 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500486 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500487 v2 = extract v0 0 v1
Mike Kleind48488b2019-10-22 12:27:58 -0500488 v3 = load32 arg(1)
489 v4 = extract v3 0 v1
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500490 v5 = extract v0 24 v1
Mike Kleind48488b2019-10-22 12:27:58 -0500491 v6 = splat 100 (3.5873241e-43)
492 v7 = sub_i32 v6 v5
493 v8 = mul_i32 v4 v7
494 v9 = shr_i32 v8 8
495 v10 = add_i32 v2 v9
496 v11 = extract v0 8 v1
497 v12 = extract v3 8 v1
498 v13 = mul_i32 v12 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500499 v14 = shr_i32 v13 8
Mike Kleind48488b2019-10-22 12:27:58 -0500500 v15 = add_i32 v11 v14
501 v16 = pack v10 v15 8
502 v17 = extract v0 16 v1
503 v18 = extract v3 16 v1
504 v19 = mul_i32 v18 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500505 v20 = shr_i32 v19 8
Mike Kleind48488b2019-10-22 12:27:58 -0500506 v21 = add_i32 v17 v20
507 v22 = extract v3 24 v1
508 v23 = mul_i32 v22 v7
509 v24 = shr_i32 v23 8
510 v25 = add_i32 v5 v24
511 v26 = pack v21 v25 8
512 v27 = pack v16 v26 16
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500513 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500514
Mike Kleind48488b2019-10-22 12:27:58 -05005159 registers, 29 instructions:
Mike Klein397fc882019-06-20 11:37:10 -0500516r0 = splat FF (3.5733111e-43)
517r1 = splat 100 (3.5873241e-43)
518loop:
Mike Kleine8356ad2019-11-04 20:14:24 -0600519 r2 = load32 arg(0)
520 r3 = extract r2 0 r0
521 r4 = load32 arg(1)
522 r5 = extract r4 0 r0
523 r6 = extract r2 24 r0
524 r7 = sub_i32 r1 r6
525 r5 = mul_i32 r5 r7
526 r5 = shr_i32 r5 8
527 r5 = add_i32 r3 r5
528 r3 = extract r2 8 r0
529 r8 = extract r4 8 r0
530 r8 = mul_i32 r8 r7
531 r8 = shr_i32 r8 8
532 r8 = add_i32 r3 r8
533 r8 = pack r5 r8 8
534 r2 = extract r2 16 r0
535 r5 = extract r4 16 r0
536 r5 = mul_i32 r5 r7
537 r5 = shr_i32 r5 8
538 r5 = add_i32 r2 r5
539 r4 = extract r4 24 r0
540 r7 = mul_i32 r4 r7
541 r7 = shr_i32 r7 8
542 r7 = add_i32 r6 r7
543 r7 = pack r5 r7 8
544 r7 = pack r8 r7 16
545 store32 arg(1) r7
Mike Klein397fc882019-06-20 11:37:10 -0500546
Mike Klein7b7077c2019-06-03 17:10:59 -0500547I32 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050054829 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500549 v0 = load32 arg(0)
Mike Klein5e533c92019-07-22 13:44:54 -0500550 v1 = splat FF (3.5733111e-43)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500551 v2 = bit_and v0 v1
Mike Kleind48488b2019-10-22 12:27:58 -0500552 v3 = load32 arg(1)
553 v4 = bit_and v3 v1
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500554 v5 = shr_i32 v0 24
Mike Kleind48488b2019-10-22 12:27:58 -0500555 v6 = splat 100 (3.5873241e-43)
556 v7 = sub_i32 v6 v5
557 v8 = mul_i16x2 v4 v7
558 v9 = shr_i32 v8 8
559 v10 = add_i32 v2 v9
560 v11 = bytes v0 2
561 v12 = bytes v3 2
562 v13 = mul_i16x2 v12 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500563 v14 = shr_i32 v13 8
Mike Kleind48488b2019-10-22 12:27:58 -0500564 v15 = add_i32 v11 v14
565 v16 = pack v10 v15 8
566 v17 = bytes v0 3
567 v18 = bytes v3 3
568 v19 = mul_i16x2 v18 v7
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500569 v20 = shr_i32 v19 8
Mike Kleind48488b2019-10-22 12:27:58 -0500570 v21 = add_i32 v17 v20
571 v22 = shr_i32 v3 24
572 v23 = mul_i16x2 v22 v7
573 v24 = shr_i32 v23 8
574 v25 = add_i32 v5 v24
575 v26 = pack v21 v25 8
576 v27 = pack v16 v26 16
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500577 store32 arg(1) v27
Mike Kleinaab45b52019-07-02 15:39:23 -0500578
Mike Kleind48488b2019-10-22 12:27:58 -05005799 registers, 29 instructions:
Mike Klein754bad32019-06-05 10:47:46 -0500580r0 = splat FF (3.5733111e-43)
Mike Klein821f5e82019-06-13 10:56:51 -0500581r1 = splat 100 (3.5873241e-43)
Mike Klein754bad32019-06-05 10:47:46 -0500582loop:
Mike Kleine8356ad2019-11-04 20:14:24 -0600583 r2 = load32 arg(0)
584 r3 = bit_and r2 r0
585 r4 = load32 arg(1)
586 r5 = bit_and r4 r0
587 r6 = shr_i32 r2 24
588 r7 = sub_i32 r1 r6
589 r5 = mul_i16x2 r5 r7
590 r5 = shr_i32 r5 8
591 r5 = add_i32 r3 r5
592 r3 = bytes r2 2
593 r8 = bytes r4 2
594 r8 = mul_i16x2 r8 r7
595 r8 = shr_i32 r8 8
596 r8 = add_i32 r3 r8
597 r8 = pack r5 r8 8
598 r2 = bytes r2 3
599 r5 = bytes r4 3
600 r5 = mul_i16x2 r5 r7
601 r5 = shr_i32 r5 8
602 r5 = add_i32 r2 r5
603 r4 = shr_i32 r4 24
604 r7 = mul_i16x2 r4 r7
605 r7 = shr_i32 r7 8
606 r7 = add_i32 r6 r7
607 r7 = pack r5 r7 8
608 r7 = pack r8 r7 16
609 store32 arg(1) r7
Mike Klein821f5e82019-06-13 10:56:51 -0500610
611I32 (SWAR) 8888 over 8888
Mike Kleinaab45b52019-07-02 15:39:23 -050061215 values:
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500613 v0 = load32 arg(0)
614 v1 = bytes v0 404
Mike Klein5e533c92019-07-22 13:44:54 -0500615 v2 = splat 1000100 (2.3510604e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500616 v3 = sub_i16x2 v2 v1
617 v4 = load32 arg(1)
Mike Klein5e533c92019-07-22 13:44:54 -0500618 v5 = splat FF00FF (2.3418409e-38)
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500619 v6 = bit_and v4 v5
Mike Kleind48488b2019-10-22 12:27:58 -0500620 v7 = mul_i16x2 v6 v3
621 v8 = shr_i16x2 v7 8
622 v9 = shr_i16x2 v4 8
623 v10 = mul_i16x2 v9 v3
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500624 v11 = bit_clear v10 v5
Mike Kleind48488b2019-10-22 12:27:58 -0500625 v12 = bit_or v8 v11
Mike Kleinc2fb3b42019-07-17 12:09:09 -0500626 v13 = add_i32 v0 v12
627 store32 arg(1) v13
Mike Kleinaab45b52019-07-02 15:39:23 -0500628
Mike Klein2b7b2a22019-06-23 20:35:28 -04006296 registers, 15 instructions:
Mike Klein7f061fb2019-06-13 13:12:38 -0500630r0 = splat 1000100 (2.3510604e-38)
631r1 = splat FF00FF (2.3418409e-38)
Mike Klein821f5e82019-06-13 10:56:51 -0500632loop:
Mike Kleine8356ad2019-11-04 20:14:24 -0600633 r2 = load32 arg(0)
634 r3 = bytes r2 404
635 r3 = sub_i16x2 r0 r3
636 r4 = load32 arg(1)
637 r5 = bit_and r4 r1
638 r5 = mul_i16x2 r5 r3
639 r5 = shr_i16x2 r5 8
640 r4 = shr_i16x2 r4 8
641 r3 = mul_i16x2 r4 r3
642 r3 = bit_clear r3 r1
643 r3 = bit_or r5 r3
644 r3 = add_i32 r2 r3
645 store32 arg(1) r3
Mike Klein7b7077c2019-06-03 17:10:59 -0500646
Mike Kleinf9963112019-08-08 15:13:25 -04006476 values:
Mike Klein0f61c122019-10-16 10:46:01 -0500648 v0 = splat 1 (1.4012985e-45)
649 v1 = splat 2 (2.8025969e-45)
Mike Kleinf9963112019-08-08 15:13:25 -0400650 v2 = add_i32 v0 v1
651 v3 = load32 arg(0)
652 v4 = mul_i32 v3 v2
653 store32 arg(0) v4
654
6552 registers, 6 instructions:
656r0 = splat 1 (1.4012985e-45)
657r1 = splat 2 (2.8025969e-45)
658r1 = add_i32 r0 r1
659loop:
Mike Kleine8356ad2019-11-04 20:14:24 -0600660 r0 = load32 arg(0)
661 r0 = mul_i32 r0 r1
662 store32 arg(0) r0
Mike Kleinf9963112019-08-08 15:13:25 -0400663
Mike Kleind48488b2019-10-22 12:27:58 -050066419 values:
665 v0 = splat FF (3.5733111e-43)
666 v1 = load32 arg(0)
667 v2 = extract v1 0 v0
668 v3 = load32 arg(1)
669 v4 = extract v3 0 v0
670 v5 = add_i32 v2 v4
671 v6 = extract v1 8 v0
672 v7 = extract v3 8 v0
673 v8 = add_i32 v6 v7
674 v9 = pack v5 v8 8
675 v10 = extract v1 16 v0
676 v11 = extract v3 16 v0
677 v12 = add_i32 v10 v11
678 v13 = extract v1 24 v0
679 v14 = extract v3 24 v0
680 v15 = add_i32 v13 v14
681 v16 = pack v12 v15 8
682 v17 = pack v9 v16 16
683 store32 arg(1) v17
684
6856 registers, 19 instructions:
686r0 = splat FF (3.5733111e-43)
687loop:
Mike Kleine8356ad2019-11-04 20:14:24 -0600688 r1 = load32 arg(0)
689 r2 = extract r1 0 r0
690 r3 = load32 arg(1)
691 r4 = extract r3 0 r0
692 r4 = add_i32 r2 r4
693 r2 = extract r1 8 r0
694 r5 = extract r3 8 r0
695 r5 = add_i32 r2 r5
696 r5 = pack r4 r5 8
697 r4 = extract r1 16 r0
698 r2 = extract r3 16 r0
699 r2 = add_i32 r4 r2
700 r1 = extract r1 24 r0
701 r3 = extract r3 24 r0
702 r3 = add_i32 r1 r3
703 r3 = pack r2 r3 8
704 r3 = pack r5 r3 16
705 store32 arg(1) r3
Mike Kleind48488b2019-10-22 12:27:58 -0500706