blob: 1008de12125117523d43c029451d65280e576ac3 [file] [log] [blame]
sewardj69d98e32010-06-18 08:17:41 +00001
2/*---------------------------------------------------------------*/
3/*--- begin host_generic_simd128.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2010-2013 OpenWorks GbR
sewardj69d98e32010-06-18 08:17:41 +000011 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31/* Generic helper functions for doing 128-bit SIMD arithmetic in cases
32 where the instruction selectors cannot generate code in-line.
33 These are purely back-end entities and cannot be seen/referenced
34 from IR. */
35
36#include "libvex_basictypes.h"
37#include "host_generic_simd128.h"
38
39
40/* Primitive helpers always take args of the real type (signed vs
41 unsigned) but return an unsigned result, so there's no conversion
42 weirdness when stuffing results back in the V128 union fields,
43 which are all unsigned. */
44
45static inline UInt mul32 ( Int xx, Int yy )
46{
47 Int t = ((Int)xx) * ((Int)yy);
48 return toUInt(t);
49}
50
51static inline UInt max32S ( Int xx, Int yy )
52{
53 return toUInt((xx > yy) ? xx : yy);
54}
55
56static inline UInt min32S ( Int xx, Int yy )
57{
58 return toUInt((xx < yy) ? xx : yy);
59}
60
61static inline UInt max32U ( UInt xx, UInt yy )
62{
63 return toUInt((xx > yy) ? xx : yy);
64}
65
66static inline UInt min32U ( UInt xx, UInt yy )
67{
68 return toUInt((xx < yy) ? xx : yy);
69}
70
71static inline UShort max16U ( UShort xx, UShort yy )
72{
73 return toUShort((xx > yy) ? xx : yy);
74}
75
76static inline UShort min16U ( UShort xx, UShort yy )
77{
78 return toUShort((xx < yy) ? xx : yy);
79}
80
81static inline UChar max8S ( Char xx, Char yy )
82{
83 return toUChar((xx > yy) ? xx : yy);
84}
85
86static inline UChar min8S ( Char xx, Char yy )
87{
88 return toUChar((xx < yy) ? xx : yy);
89}
90
sewardjd8815622011-10-19 15:24:01 +000091static inline ULong cmpEQ64 ( Long xx, Long yy )
92{
93 return (((Long)xx) == ((Long)yy))
94 ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
95}
96
sewardj69d98e32010-06-18 08:17:41 +000097static inline ULong cmpGT64S ( Long xx, Long yy )
98{
99 return (((Long)xx) > ((Long)yy))
100 ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
101}
102
sewardj0874bee2011-01-17 10:32:18 +0000103static inline ULong sar64 ( ULong v, UInt n )
104{
105 return ((Long)v) >> n;
106}
107
108static inline UChar sar8 ( UChar v, UInt n )
109{
110 return toUChar(((Char)v) >> n);
111}
112
sewardj2260b992011-06-15 16:05:07 +0000113static inline UShort qnarrow32Sto16U ( UInt xx0 )
114{
115 Int xx = (Int)xx0;
116 if (xx < 0) xx = 0;
117 if (xx > 65535) xx = 65535;
118 return (UShort)xx;
119}
120
sewardjad2c9ea2011-10-22 09:32:16 +0000121static inline UShort narrow32to16 ( UInt xx )
122{
123 return (UShort)xx;
124}
125
126static inline UChar narrow16to8 ( UShort xx )
127{
128 return (UChar)xx;
129}
130
131
132void VEX_REGPARM(3)
133 h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000134 V128* argL, V128* argR )
135{
136 res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
137 res->w32[1] = mul32(argL->w32[1], argR->w32[1]);
138 res->w32[2] = mul32(argL->w32[2], argR->w32[2]);
139 res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
140}
141
sewardjad2c9ea2011-10-22 09:32:16 +0000142void VEX_REGPARM(3)
143 h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000144 V128* argL, V128* argR )
145{
146 res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
147 res->w32[1] = max32S(argL->w32[1], argR->w32[1]);
148 res->w32[2] = max32S(argL->w32[2], argR->w32[2]);
149 res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
150}
151
sewardjad2c9ea2011-10-22 09:32:16 +0000152void VEX_REGPARM(3)
153 h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000154 V128* argL, V128* argR )
155{
156 res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
157 res->w32[1] = min32S(argL->w32[1], argR->w32[1]);
158 res->w32[2] = min32S(argL->w32[2], argR->w32[2]);
159 res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
160}
161
sewardjad2c9ea2011-10-22 09:32:16 +0000162void VEX_REGPARM(3)
163 h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000164 V128* argL, V128* argR )
165{
166 res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
167 res->w32[1] = max32U(argL->w32[1], argR->w32[1]);
168 res->w32[2] = max32U(argL->w32[2], argR->w32[2]);
169 res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
170}
171
sewardjad2c9ea2011-10-22 09:32:16 +0000172void VEX_REGPARM(3)
173 h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000174 V128* argL, V128* argR )
175{
176 res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
177 res->w32[1] = min32U(argL->w32[1], argR->w32[1]);
178 res->w32[2] = min32U(argL->w32[2], argR->w32[2]);
179 res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
180}
181
sewardjad2c9ea2011-10-22 09:32:16 +0000182void VEX_REGPARM(3)
183 h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000184 V128* argL, V128* argR )
185{
186 res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
187 res->w16[1] = max16U(argL->w16[1], argR->w16[1]);
188 res->w16[2] = max16U(argL->w16[2], argR->w16[2]);
189 res->w16[3] = max16U(argL->w16[3], argR->w16[3]);
190 res->w16[4] = max16U(argL->w16[4], argR->w16[4]);
191 res->w16[5] = max16U(argL->w16[5], argR->w16[5]);
192 res->w16[6] = max16U(argL->w16[6], argR->w16[6]);
193 res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
194}
195
sewardjad2c9ea2011-10-22 09:32:16 +0000196void VEX_REGPARM(3)
197 h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000198 V128* argL, V128* argR )
199{
200 res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
201 res->w16[1] = min16U(argL->w16[1], argR->w16[1]);
202 res->w16[2] = min16U(argL->w16[2], argR->w16[2]);
203 res->w16[3] = min16U(argL->w16[3], argR->w16[3]);
204 res->w16[4] = min16U(argL->w16[4], argR->w16[4]);
205 res->w16[5] = min16U(argL->w16[5], argR->w16[5]);
206 res->w16[6] = min16U(argL->w16[6], argR->w16[6]);
207 res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
208}
209
sewardjad2c9ea2011-10-22 09:32:16 +0000210void VEX_REGPARM(3)
211 h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000212 V128* argL, V128* argR )
213{
214 res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
215 res->w8[ 1] = max8S(argL->w8[ 1], argR->w8[ 1]);
216 res->w8[ 2] = max8S(argL->w8[ 2], argR->w8[ 2]);
217 res->w8[ 3] = max8S(argL->w8[ 3], argR->w8[ 3]);
218 res->w8[ 4] = max8S(argL->w8[ 4], argR->w8[ 4]);
219 res->w8[ 5] = max8S(argL->w8[ 5], argR->w8[ 5]);
220 res->w8[ 6] = max8S(argL->w8[ 6], argR->w8[ 6]);
221 res->w8[ 7] = max8S(argL->w8[ 7], argR->w8[ 7]);
222 res->w8[ 8] = max8S(argL->w8[ 8], argR->w8[ 8]);
223 res->w8[ 9] = max8S(argL->w8[ 9], argR->w8[ 9]);
224 res->w8[10] = max8S(argL->w8[10], argR->w8[10]);
225 res->w8[11] = max8S(argL->w8[11], argR->w8[11]);
226 res->w8[12] = max8S(argL->w8[12], argR->w8[12]);
227 res->w8[13] = max8S(argL->w8[13], argR->w8[13]);
228 res->w8[14] = max8S(argL->w8[14], argR->w8[14]);
229 res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
230}
231
sewardjad2c9ea2011-10-22 09:32:16 +0000232void VEX_REGPARM(3)
233 h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000234 V128* argL, V128* argR )
235{
236 res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
237 res->w8[ 1] = min8S(argL->w8[ 1], argR->w8[ 1]);
238 res->w8[ 2] = min8S(argL->w8[ 2], argR->w8[ 2]);
239 res->w8[ 3] = min8S(argL->w8[ 3], argR->w8[ 3]);
240 res->w8[ 4] = min8S(argL->w8[ 4], argR->w8[ 4]);
241 res->w8[ 5] = min8S(argL->w8[ 5], argR->w8[ 5]);
242 res->w8[ 6] = min8S(argL->w8[ 6], argR->w8[ 6]);
243 res->w8[ 7] = min8S(argL->w8[ 7], argR->w8[ 7]);
244 res->w8[ 8] = min8S(argL->w8[ 8], argR->w8[ 8]);
245 res->w8[ 9] = min8S(argL->w8[ 9], argR->w8[ 9]);
246 res->w8[10] = min8S(argL->w8[10], argR->w8[10]);
247 res->w8[11] = min8S(argL->w8[11], argR->w8[11]);
248 res->w8[12] = min8S(argL->w8[12], argR->w8[12]);
249 res->w8[13] = min8S(argL->w8[13], argR->w8[13]);
250 res->w8[14] = min8S(argL->w8[14], argR->w8[14]);
251 res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
252}
253
sewardjad2c9ea2011-10-22 09:32:16 +0000254void VEX_REGPARM(3)
255 h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
sewardjd8815622011-10-19 15:24:01 +0000256 V128* argL, V128* argR )
257{
258 res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]);
259 res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]);
260}
261
sewardjad2c9ea2011-10-22 09:32:16 +0000262void VEX_REGPARM(3)
263 h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
sewardj69d98e32010-06-18 08:17:41 +0000264 V128* argL, V128* argR )
265{
266 res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
267 res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]);
268}
269
sewardj0874bee2011-01-17 10:32:18 +0000270/* ------------ Shifting ------------ */
271/* Note that because these primops are undefined if the shift amount
272 equals or exceeds the lane width, the shift amount is masked so
273 that the scalar shifts are always in range. In fact, given the
274 semantics of these primops (Sar64x2, etc) it is an error if in
275 fact we are ever given an out-of-range shift amount.
276*/
sewardjad2c9ea2011-10-22 09:32:16 +0000277void /*not-regparm*/
278 h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
sewardj0874bee2011-01-17 10:32:18 +0000279 V128* argL, UInt nn)
280{
281 /* vassert(nn < 64); */
282 nn &= 63;
283 res->w64[0] = sar64(argL->w64[0], nn);
284 res->w64[1] = sar64(argL->w64[1], nn);
285}
286
sewardjad2c9ea2011-10-22 09:32:16 +0000287void /*not-regparm*/
288 h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
sewardj0874bee2011-01-17 10:32:18 +0000289 V128* argL, UInt nn)
290{
291 /* vassert(nn < 8); */
292 nn &= 7;
293 res->w8[ 0] = sar8(argL->w8[ 0], nn);
294 res->w8[ 1] = sar8(argL->w8[ 1], nn);
295 res->w8[ 2] = sar8(argL->w8[ 2], nn);
296 res->w8[ 3] = sar8(argL->w8[ 3], nn);
297 res->w8[ 4] = sar8(argL->w8[ 4], nn);
298 res->w8[ 5] = sar8(argL->w8[ 5], nn);
299 res->w8[ 6] = sar8(argL->w8[ 6], nn);
300 res->w8[ 7] = sar8(argL->w8[ 7], nn);
301 res->w8[ 8] = sar8(argL->w8[ 8], nn);
302 res->w8[ 9] = sar8(argL->w8[ 9], nn);
303 res->w8[10] = sar8(argL->w8[10], nn);
304 res->w8[11] = sar8(argL->w8[11], nn);
305 res->w8[12] = sar8(argL->w8[12], nn);
306 res->w8[13] = sar8(argL->w8[13], nn);
307 res->w8[14] = sar8(argL->w8[14], nn);
308 res->w8[15] = sar8(argL->w8[15], nn);
309}
sewardj69d98e32010-06-18 08:17:41 +0000310
sewardjad2c9ea2011-10-22 09:32:16 +0000311void VEX_REGPARM(3)
312 h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
sewardj5f438dd2011-06-16 11:36:23 +0000313 V128* argL, V128* argR )
sewardj2260b992011-06-15 16:05:07 +0000314{
315 res->w16[0] = qnarrow32Sto16U(argR->w32[0]);
316 res->w16[1] = qnarrow32Sto16U(argR->w32[1]);
317 res->w16[2] = qnarrow32Sto16U(argR->w32[2]);
318 res->w16[3] = qnarrow32Sto16U(argR->w32[3]);
319 res->w16[4] = qnarrow32Sto16U(argL->w32[0]);
320 res->w16[5] = qnarrow32Sto16U(argL->w32[1]);
321 res->w16[6] = qnarrow32Sto16U(argL->w32[2]);
322 res->w16[7] = qnarrow32Sto16U(argL->w32[3]);
323}
324
sewardjad2c9ea2011-10-22 09:32:16 +0000325void VEX_REGPARM(3)
326 h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res,
327 V128* argL, V128* argR )
328{
329 res->w8[ 0] = narrow16to8(argR->w16[0]);
330 res->w8[ 1] = narrow16to8(argR->w16[1]);
331 res->w8[ 2] = narrow16to8(argR->w16[2]);
332 res->w8[ 3] = narrow16to8(argR->w16[3]);
333 res->w8[ 4] = narrow16to8(argR->w16[4]);
334 res->w8[ 5] = narrow16to8(argR->w16[5]);
335 res->w8[ 6] = narrow16to8(argR->w16[6]);
336 res->w8[ 7] = narrow16to8(argR->w16[7]);
337 res->w8[ 8] = narrow16to8(argL->w16[0]);
338 res->w8[ 9] = narrow16to8(argL->w16[1]);
339 res->w8[10] = narrow16to8(argL->w16[2]);
340 res->w8[11] = narrow16to8(argL->w16[3]);
341 res->w8[12] = narrow16to8(argL->w16[4]);
342 res->w8[13] = narrow16to8(argL->w16[5]);
343 res->w8[14] = narrow16to8(argL->w16[6]);
344 res->w8[15] = narrow16to8(argL->w16[7]);
345}
346
347void VEX_REGPARM(3)
348 h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res,
349 V128* argL, V128* argR )
350{
351 res->w16[0] = narrow32to16(argR->w32[0]);
352 res->w16[1] = narrow32to16(argR->w32[1]);
353 res->w16[2] = narrow32to16(argR->w32[2]);
354 res->w16[3] = narrow32to16(argR->w32[3]);
355 res->w16[4] = narrow32to16(argL->w32[0]);
356 res->w16[5] = narrow32to16(argL->w32[1]);
357 res->w16[6] = narrow32to16(argL->w32[2]);
358 res->w16[7] = narrow32to16(argL->w32[3]);
359}
360
sewardjd8bca7e2012-06-20 11:46:19 +0000361void VEX_REGPARM(3)
362 h_generic_calc_Perm32x4 ( /*OUT*/V128* res,
363 V128* argL, V128* argR )
364{
365 res->w32[0] = argL->w32[ argR->w32[0] & 3 ];
366 res->w32[1] = argL->w32[ argR->w32[1] & 3 ];
367 res->w32[2] = argL->w32[ argR->w32[2] & 3 ];
368 res->w32[3] = argL->w32[ argR->w32[3] & 3 ];
369}
370
sewardj78a20592012-12-13 18:29:56 +0000371UInt /*not-regparm*/
372 h_generic_calc_GetMSBs8x16 ( ULong w64hi, ULong w64lo )
373{
374 UInt r = 0;
375 if (w64hi & (1ULL << (64-1))) r |= (1<<15);
376 if (w64hi & (1ULL << (56-1))) r |= (1<<14);
377 if (w64hi & (1ULL << (48-1))) r |= (1<<13);
378 if (w64hi & (1ULL << (40-1))) r |= (1<<12);
379 if (w64hi & (1ULL << (32-1))) r |= (1<<11);
380 if (w64hi & (1ULL << (24-1))) r |= (1<<10);
381 if (w64hi & (1ULL << (16-1))) r |= (1<<9);
382 if (w64hi & (1ULL << ( 8-1))) r |= (1<<8);
383 if (w64lo & (1ULL << (64-1))) r |= (1<<7);
384 if (w64lo & (1ULL << (56-1))) r |= (1<<6);
385 if (w64lo & (1ULL << (48-1))) r |= (1<<5);
386 if (w64lo & (1ULL << (40-1))) r |= (1<<4);
387 if (w64lo & (1ULL << (32-1))) r |= (1<<3);
388 if (w64lo & (1ULL << (24-1))) r |= (1<<2);
389 if (w64lo & (1ULL << (16-1))) r |= (1<<1);
390 if (w64lo & (1ULL << ( 8-1))) r |= (1<<0);
391 return r;
392}
sewardj2260b992011-06-15 16:05:07 +0000393
sewardj69d98e32010-06-18 08:17:41 +0000394/*---------------------------------------------------------------*/
395/*--- end host_generic_simd128.c ---*/
396/*---------------------------------------------------------------*/