blob: 77d7277c6dae724e601c694de1da7f18198612b4 [file] [log] [blame]
sewardj38a3f862005-01-13 15:06:51 +00001
2/*---------------------------------------------------------------*/
3/*--- ---*/
4/*--- This file (host-generic/h_generic_simd64.c) is ---*/
5/*--- Copyright (c) 2005 OpenWorks LLP. All rights reserved. ---*/
6/*--- ---*/
7/*---------------------------------------------------------------*/
8
9/*
10 This file is part of LibVEX, a library for dynamic binary
11 instrumentation and translation.
12
13 Copyright (C) 2004-2005 OpenWorks, LLP.
14
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; Version 2 dated June 1991 of the
18 license.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or liability
23 for damages. See the GNU General Public License for more details.
24
25 Neither the names of the U.S. Department of Energy nor the
26 University of California nor the names of its contributors may be
27 used to endorse or promote products derived from this software
28 without prior written permission.
29
30 You should have received a copy of the GNU General Public License
31 along with this program; if not, write to the Free Software
32 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
33 USA.
34*/
35
36/* Generic helper functions for doing 64-bit SIMD arithmetic in cases
37 where the instruction selectors cannot generate code in-line.
38 These are purely back-end entities and cannot be seen/referenced
39 from IR. */
40
41#include "libvex_basictypes.h"
42#include "host-generic/h_generic_simd64.h"
43
44
45
46/* Tuple/select functions for 32x2 vectors. */
47
48static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
49 return (((ULong)w1) << 32) | ((ULong)w0);
50}
51
52static inline UInt sel32x2_1 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000053 return 0xFFFFFFFF & toUInt(w64 >> 32);
sewardj38a3f862005-01-13 15:06:51 +000054}
55static inline UInt sel32x2_0 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000056 return 0xFFFFFFFF & toUInt(w64);
sewardj38a3f862005-01-13 15:06:51 +000057}
58
59
60/* Tuple/select functions for 16x4 vectors. gcc is pretty hopeless
61 with 64-bit shifts so we give it a hand. */
62
63static inline ULong mk16x4 ( UShort w3, UShort w2,
64 UShort w1, UShort w0 ) {
65 UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2);
66 UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0);
67 return mk32x2(hi32, lo32);
68}
69
70static inline UShort sel16x4_3 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000071 UInt hi32 = toUInt(w64 >> 32);
72 return toUShort(0xFFFF & (hi32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +000073}
74static inline UShort sel16x4_2 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000075 UInt hi32 = toUInt(w64 >> 32);
76 return toUShort(0xFFFF & hi32);
sewardj38a3f862005-01-13 15:06:51 +000077}
78static inline UShort sel16x4_1 ( ULong w64 ) {
79 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +000080 return toUShort(0xFFFF & (lo32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +000081}
82static inline UShort sel16x4_0 ( ULong w64 ) {
83 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +000084 return toUShort(0xFFFF & lo32);
sewardj38a3f862005-01-13 15:06:51 +000085}
86
87
88/* Tuple/select functions for 8x8 vectors. */
89
90static inline ULong mk8x8 ( UChar w7, UChar w6,
91 UChar w5, UChar w4,
92 UChar w3, UChar w2,
93 UChar w1, UChar w0 ) {
94 UInt hi32 = (((UInt)w7) << 24) | (((UInt)w6) << 16)
95 | (((UInt)w5) << 8) | (((UInt)w4) << 0);
96 UInt lo32 = (((UInt)w3) << 24) | (((UInt)w2) << 16)
97 | (((UInt)w1) << 8) | (((UInt)w0) << 0);
98 return mk32x2(hi32, lo32);
99}
100
101static inline UChar sel8x8_7 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000102 UInt hi32 = toUInt(w64 >> 32);
103 return toUChar(0xFF & (hi32 >> 24));
sewardj38a3f862005-01-13 15:06:51 +0000104}
105static inline UChar sel8x8_6 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000106 UInt hi32 = toUInt(w64 >> 32);
107 return toUChar(0xFF & (hi32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +0000108}
109static inline UChar sel8x8_5 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000110 UInt hi32 = toUInt(w64 >> 32);
111 return toUChar(0xFF & (hi32 >> 8));
sewardj38a3f862005-01-13 15:06:51 +0000112}
113static inline UChar sel8x8_4 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000114 UInt hi32 = toUInt(w64 >> 32);
115 return toUChar(0xFF & (hi32 >> 0));
sewardj38a3f862005-01-13 15:06:51 +0000116}
117static inline UChar sel8x8_3 ( ULong w64 ) {
118 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000119 return toUChar(0xFF & (lo32 >> 24));
sewardj38a3f862005-01-13 15:06:51 +0000120}
121static inline UChar sel8x8_2 ( ULong w64 ) {
122 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000123 return toUChar(0xFF & (lo32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +0000124}
125static inline UChar sel8x8_1 ( ULong w64 ) {
126 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000127 return toUChar(0xFF & (lo32 >> 8));
sewardj38a3f862005-01-13 15:06:51 +0000128}
129static inline UChar sel8x8_0 ( ULong w64 ) {
130 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000131 return toUChar(0xFF & (lo32 >> 0));
sewardj38a3f862005-01-13 15:06:51 +0000132}
133
134
135/* Scalar helpers. */
136
137static inline Short qadd16S ( Short xx, Short yy )
138{
139 Int t = ((Int)xx) + ((Int)yy);
140 if (t < -32768) t = -32768;
141 if (t > 32767) t = 32767;
142 return (Short)t;
143}
144
145static inline Char qadd8S ( Char xx, Char yy )
146{
147 Int t = ((Int)xx) + ((Int)yy);
148 if (t < -128) t = -128;
149 if (t > 127) t = 127;
150 return (Char)t;
151}
152
153static inline UShort qadd16U ( UShort xx, UShort yy )
154{
155 UInt t = ((UInt)xx) + ((UInt)yy);
156 if (t > 0xFFFF) t = 0xFFFF;
157 return (UShort)t;
158}
159
160static inline UChar qadd8U ( UChar xx, UChar yy )
161{
162 UInt t = ((UInt)xx) + ((UInt)yy);
163 if (t > 0xFF) t = 0xFF;
164 return (UChar)t;
165}
166
167static inline Short qsub16S ( Short xx, Short yy )
168{
169 Int t = ((Int)xx) - ((Int)yy);
170 if (t < -32768) t = -32768;
171 if (t > 32767) t = 32767;
172 return (Short)t;
173}
174
175static inline Char qsub8S ( Char xx, Char yy )
176{
177 Int t = ((Int)xx) - ((Int)yy);
178 if (t < -128) t = -128;
179 if (t > 127) t = 127;
180 return (Char)t;
181}
182
183static inline UShort qsub16U ( UShort xx, UShort yy )
184{
185 Int t = ((Int)xx) - ((Int)yy);
186 if (t < 0) t = 0;
187 if (t > 0xFFFF) t = 0xFFFF;
188 return (UShort)t;
189}
190
191static inline UChar qsub8U ( UChar xx, UChar yy )
192{
193 Int t = ((Int)xx) - ((Int)yy);
194 if (t < 0) t = 0;
195 if (t > 0xFF) t = 0xFF;
196 return (UChar)t;
197}
198
199static inline Short mul16 ( Short xx, Short yy )
200{
201 Int t = ((Int)xx) * ((Int)yy);
202 return (Short)t;
203}
204
205static inline Short mulhi16S ( Short xx, Short yy )
206{
207 Int t = ((Int)xx) * ((Int)yy);
208 t >>=/*s*/ 16;
209 return (Short)t;
210}
211
212static inline UShort mulhi16U ( UShort xx, UShort yy )
213{
214 UInt t = ((UInt)xx) * ((UInt)yy);
215 t >>=/*u*/ 16;
216 return (UShort)t;
217}
218
219static inline UInt cmpeq32 ( UInt xx, UInt yy )
220{
221 return xx==yy ? 0xFFFFFFFF : 0;
222}
223
224static inline UShort cmpeq16 ( UShort xx, UShort yy )
225{
sewardjd19fc162005-02-26 02:16:39 +0000226 return toUShort(xx==yy ? 0xFFFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000227}
228
229static inline UChar cmpeq8 ( UChar xx, UChar yy )
230{
sewardjd19fc162005-02-26 02:16:39 +0000231 return toUChar(xx==yy ? 0xFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000232}
233
234static inline UInt cmpgt32S ( Int xx, Int yy )
235{
236 return xx>yy ? 0xFFFFFFFF : 0;
237}
238
239static inline UShort cmpgt16S ( Short xx, Short yy )
240{
sewardjd19fc162005-02-26 02:16:39 +0000241 return toUShort(xx>yy ? 0xFFFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000242}
243
244static inline UChar cmpgt8S ( Char xx, Char yy )
245{
sewardjd19fc162005-02-26 02:16:39 +0000246 return toUChar(xx>yy ? 0xFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000247}
248
sewardj18069182005-01-13 19:16:04 +0000249static inline UInt cmpnez32 ( UInt xx )
250{
251 return xx==0 ? 0 : 0xFFFFFFFF;
252}
253
254static inline UShort cmpnez16 ( UShort xx )
255{
sewardjd19fc162005-02-26 02:16:39 +0000256 return toUShort(xx==0 ? 0 : 0xFFFF);
sewardj18069182005-01-13 19:16:04 +0000257}
258
259static inline UChar cmpnez8 ( UChar xx )
260{
sewardjd19fc162005-02-26 02:16:39 +0000261 return toUChar(xx==0 ? 0 : 0xFF);
sewardj18069182005-01-13 19:16:04 +0000262}
263
sewardj38a3f862005-01-13 15:06:51 +0000264static inline Short qnarrow32Sto16 ( UInt xx0 )
265{
266 Int xx = (Int)xx0;
267 if (xx < -32768) xx = -32768;
268 if (xx > 32767) xx = 32767;
269 return (Short)xx;
270}
271
272static inline Char qnarrow16Sto8 ( UShort xx0 )
273{
274 Short xx = (Short)xx0;
275 if (xx < -128) xx = -128;
276 if (xx > 127) xx = 127;
277 return (Char)xx;
278}
279
280static inline UChar qnarrow16Uto8 ( UShort xx0 )
281{
282 Short xx = (Short)xx0;
283 if (xx < 0) xx = 0;
284 if (xx > 255) xx = 255;
285 return (UChar)xx;
286}
287
288/* shifts: we don't care about out-of-range ones, since
289 that is dealt with at a higher level. */
290
291static inline UShort shl16 ( UShort v, UInt n )
292{
sewardjd19fc162005-02-26 02:16:39 +0000293 return toUShort(v << n);
sewardj38a3f862005-01-13 15:06:51 +0000294}
295
296static inline UShort shr16 ( UShort v, UInt n )
297{
sewardjd19fc162005-02-26 02:16:39 +0000298 return toUShort((((UShort)v) >> n));
sewardj38a3f862005-01-13 15:06:51 +0000299}
300
301static inline UShort sar16 ( UShort v, UInt n )
302{
sewardjd19fc162005-02-26 02:16:39 +0000303 return toUShort(((Short)v) >> n);
sewardj38a3f862005-01-13 15:06:51 +0000304}
305
306static inline UInt shl32 ( UInt v, UInt n )
307{
308 return v << n;
309}
310
311static inline UInt shr32 ( UInt v, UInt n )
312{
313 return (((UInt)v) >> n);
314}
315
316static inline UInt sar32 ( UInt v, UInt n )
317{
318 return ((Int)v) >> n;
319}
320
321static inline UChar avg8U ( UChar xx, UChar yy )
322{
323 UInt xxi = (UInt)xx;
324 UInt yyi = (UInt)yy;
325 UInt r = (xxi + yyi + 1) >> 1;
326 return (UChar)r;
327}
328
329static inline UShort avg16U ( UShort xx, UShort yy )
330{
331 UInt xxi = (UInt)xx;
332 UInt yyi = (UInt)yy;
333 UInt r = (xxi + yyi + 1) >> 1;
334 return (UShort)r;
335}
336
337static inline Short max16S ( Short xx, Short yy )
338{
sewardjd19fc162005-02-26 02:16:39 +0000339 return toUShort((xx > yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000340}
341
342static inline UChar max8U ( UChar xx, UChar yy )
343{
sewardjd19fc162005-02-26 02:16:39 +0000344 return toUChar((xx > yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000345}
346
347static inline Short min16S ( Short xx, Short yy )
348{
sewardjd19fc162005-02-26 02:16:39 +0000349 return toUShort((xx < yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000350}
351
352static inline UChar min8U ( UChar xx, UChar yy )
353{
sewardjd19fc162005-02-26 02:16:39 +0000354 return toUChar((xx < yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000355}
356
sewardj38a3f862005-01-13 15:06:51 +0000357/* ----------------------------------------------------- */
358/* Start of the externally visible functions. These simply
359 implement the corresponding IR primops. */
360/* ----------------------------------------------------- */
361
362/* ------------ Normal addition ------------ */
363
364ULong h_generic_calc_Add32x2 ( ULong xx, ULong yy )
365{
366 return mk32x2(
367 sel32x2_1(xx) + sel32x2_1(yy),
368 sel32x2_0(xx) + sel32x2_0(yy)
369 );
370}
371
372ULong h_generic_calc_Add16x4 ( ULong xx, ULong yy )
373{
374 return mk16x4(
sewardjd19fc162005-02-26 02:16:39 +0000375 toUShort( sel16x4_3(xx) + sel16x4_3(yy) ),
376 toUShort( sel16x4_2(xx) + sel16x4_2(yy) ),
377 toUShort( sel16x4_1(xx) + sel16x4_1(yy) ),
378 toUShort( sel16x4_0(xx) + sel16x4_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000379 );
380}
381
382ULong h_generic_calc_Add8x8 ( ULong xx, ULong yy )
383{
384 return mk8x8(
sewardjd19fc162005-02-26 02:16:39 +0000385 toUChar( sel8x8_7(xx) + sel8x8_7(yy) ),
386 toUChar( sel8x8_6(xx) + sel8x8_6(yy) ),
387 toUChar( sel8x8_5(xx) + sel8x8_5(yy) ),
388 toUChar( sel8x8_4(xx) + sel8x8_4(yy) ),
389 toUChar( sel8x8_3(xx) + sel8x8_3(yy) ),
390 toUChar( sel8x8_2(xx) + sel8x8_2(yy) ),
391 toUChar( sel8x8_1(xx) + sel8x8_1(yy) ),
392 toUChar( sel8x8_0(xx) + sel8x8_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000393 );
394}
395
396/* ------------ Saturating addition ------------ */
397
398ULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy )
399{
400 return mk16x4(
401 qadd16S( sel16x4_3(xx), sel16x4_3(yy) ),
402 qadd16S( sel16x4_2(xx), sel16x4_2(yy) ),
403 qadd16S( sel16x4_1(xx), sel16x4_1(yy) ),
404 qadd16S( sel16x4_0(xx), sel16x4_0(yy) )
405 );
406}
407
408ULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy )
409{
410 return mk8x8(
411 qadd8S( sel8x8_7(xx), sel8x8_7(yy) ),
412 qadd8S( sel8x8_6(xx), sel8x8_6(yy) ),
413 qadd8S( sel8x8_5(xx), sel8x8_5(yy) ),
414 qadd8S( sel8x8_4(xx), sel8x8_4(yy) ),
415 qadd8S( sel8x8_3(xx), sel8x8_3(yy) ),
416 qadd8S( sel8x8_2(xx), sel8x8_2(yy) ),
417 qadd8S( sel8x8_1(xx), sel8x8_1(yy) ),
418 qadd8S( sel8x8_0(xx), sel8x8_0(yy) )
419 );
420}
421
422ULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy )
423{
424 return mk16x4(
425 qadd16U( sel16x4_3(xx), sel16x4_3(yy) ),
426 qadd16U( sel16x4_2(xx), sel16x4_2(yy) ),
427 qadd16U( sel16x4_1(xx), sel16x4_1(yy) ),
428 qadd16U( sel16x4_0(xx), sel16x4_0(yy) )
429 );
430}
431
432ULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy )
433{
434 return mk8x8(
435 qadd8U( sel8x8_7(xx), sel8x8_7(yy) ),
436 qadd8U( sel8x8_6(xx), sel8x8_6(yy) ),
437 qadd8U( sel8x8_5(xx), sel8x8_5(yy) ),
438 qadd8U( sel8x8_4(xx), sel8x8_4(yy) ),
439 qadd8U( sel8x8_3(xx), sel8x8_3(yy) ),
440 qadd8U( sel8x8_2(xx), sel8x8_2(yy) ),
441 qadd8U( sel8x8_1(xx), sel8x8_1(yy) ),
442 qadd8U( sel8x8_0(xx), sel8x8_0(yy) )
443 );
444}
445
446/* ------------ Normal subtraction ------------ */
447
448ULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy )
449{
450 return mk32x2(
451 sel32x2_1(xx) - sel32x2_1(yy),
452 sel32x2_0(xx) - sel32x2_0(yy)
453 );
454}
455
456ULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy )
457{
458 return mk16x4(
sewardjd19fc162005-02-26 02:16:39 +0000459 toUShort( sel16x4_3(xx) - sel16x4_3(yy) ),
460 toUShort( sel16x4_2(xx) - sel16x4_2(yy) ),
461 toUShort( sel16x4_1(xx) - sel16x4_1(yy) ),
462 toUShort( sel16x4_0(xx) - sel16x4_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000463 );
464}
465
466ULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy )
467{
468 return mk8x8(
sewardjd19fc162005-02-26 02:16:39 +0000469 toUChar( sel8x8_7(xx) - sel8x8_7(yy) ),
470 toUChar( sel8x8_6(xx) - sel8x8_6(yy) ),
471 toUChar( sel8x8_5(xx) - sel8x8_5(yy) ),
472 toUChar( sel8x8_4(xx) - sel8x8_4(yy) ),
473 toUChar( sel8x8_3(xx) - sel8x8_3(yy) ),
474 toUChar( sel8x8_2(xx) - sel8x8_2(yy) ),
475 toUChar( sel8x8_1(xx) - sel8x8_1(yy) ),
476 toUChar( sel8x8_0(xx) - sel8x8_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000477 );
478}
479
480/* ------------ Saturating subtraction ------------ */
481
482ULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy )
483{
484 return mk16x4(
485 qsub16S( sel16x4_3(xx), sel16x4_3(yy) ),
486 qsub16S( sel16x4_2(xx), sel16x4_2(yy) ),
487 qsub16S( sel16x4_1(xx), sel16x4_1(yy) ),
488 qsub16S( sel16x4_0(xx), sel16x4_0(yy) )
489 );
490}
491
492ULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy )
493{
494 return mk8x8(
495 qsub8S( sel8x8_7(xx), sel8x8_7(yy) ),
496 qsub8S( sel8x8_6(xx), sel8x8_6(yy) ),
497 qsub8S( sel8x8_5(xx), sel8x8_5(yy) ),
498 qsub8S( sel8x8_4(xx), sel8x8_4(yy) ),
499 qsub8S( sel8x8_3(xx), sel8x8_3(yy) ),
500 qsub8S( sel8x8_2(xx), sel8x8_2(yy) ),
501 qsub8S( sel8x8_1(xx), sel8x8_1(yy) ),
502 qsub8S( sel8x8_0(xx), sel8x8_0(yy) )
503 );
504}
505
506ULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy )
507{
508 return mk16x4(
509 qsub16U( sel16x4_3(xx), sel16x4_3(yy) ),
510 qsub16U( sel16x4_2(xx), sel16x4_2(yy) ),
511 qsub16U( sel16x4_1(xx), sel16x4_1(yy) ),
512 qsub16U( sel16x4_0(xx), sel16x4_0(yy) )
513 );
514}
515
516ULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy )
517{
518 return mk8x8(
519 qsub8U( sel8x8_7(xx), sel8x8_7(yy) ),
520 qsub8U( sel8x8_6(xx), sel8x8_6(yy) ),
521 qsub8U( sel8x8_5(xx), sel8x8_5(yy) ),
522 qsub8U( sel8x8_4(xx), sel8x8_4(yy) ),
523 qsub8U( sel8x8_3(xx), sel8x8_3(yy) ),
524 qsub8U( sel8x8_2(xx), sel8x8_2(yy) ),
525 qsub8U( sel8x8_1(xx), sel8x8_1(yy) ),
526 qsub8U( sel8x8_0(xx), sel8x8_0(yy) )
527 );
528}
529
530/* ------------ Multiplication ------------ */
531
532ULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy )
533{
534 return mk16x4(
535 mul16( sel16x4_3(xx), sel16x4_3(yy) ),
536 mul16( sel16x4_2(xx), sel16x4_2(yy) ),
537 mul16( sel16x4_1(xx), sel16x4_1(yy) ),
538 mul16( sel16x4_0(xx), sel16x4_0(yy) )
539 );
540}
541
542ULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy )
543{
544 return mk16x4(
545 mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ),
546 mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ),
547 mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ),
548 mulhi16S( sel16x4_0(xx), sel16x4_0(yy) )
549 );
550}
551
552ULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy )
553{
554 return mk16x4(
555 mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ),
556 mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ),
557 mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ),
558 mulhi16U( sel16x4_0(xx), sel16x4_0(yy) )
559 );
560}
561
562/* ------------ Comparison ------------ */
563
564ULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy )
565{
566 return mk32x2(
567 cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ),
568 cmpeq32( sel32x2_0(xx), sel32x2_0(yy) )
569 );
570}
571
572ULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy )
573{
574 return mk16x4(
575 cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ),
576 cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ),
577 cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ),
578 cmpeq16( sel16x4_0(xx), sel16x4_0(yy) )
579 );
580}
581
582ULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy )
583{
584 return mk8x8(
585 cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ),
586 cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ),
587 cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ),
588 cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ),
589 cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ),
590 cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ),
591 cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ),
592 cmpeq8( sel8x8_0(xx), sel8x8_0(yy) )
593 );
594}
595
596ULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy )
597{
598 return mk32x2(
599 cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ),
600 cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) )
601 );
602}
603
604ULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy )
605{
606 return mk16x4(
607 cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ),
608 cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ),
609 cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ),
610 cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) )
611 );
612}
613
614ULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy )
615{
616 return mk8x8(
617 cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ),
618 cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ),
619 cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ),
620 cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ),
621 cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ),
622 cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ),
623 cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ),
624 cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) )
625 );
626}
627
sewardj18069182005-01-13 19:16:04 +0000628ULong h_generic_calc_CmpNEZ32x2 ( ULong xx )
629{
630 return mk32x2(
631 cmpnez32( sel32x2_1(xx) ),
632 cmpnez32( sel32x2_0(xx) )
633 );
634}
635
636ULong h_generic_calc_CmpNEZ16x4 ( ULong xx )
637{
638 return mk16x4(
639 cmpnez16( sel16x4_3(xx) ),
640 cmpnez16( sel16x4_2(xx) ),
641 cmpnez16( sel16x4_1(xx) ),
642 cmpnez16( sel16x4_0(xx) )
643 );
644}
645
646ULong h_generic_calc_CmpNEZ8x8 ( ULong xx )
647{
648 return mk8x8(
649 cmpnez8( sel8x8_7(xx) ),
650 cmpnez8( sel8x8_6(xx) ),
651 cmpnez8( sel8x8_5(xx) ),
652 cmpnez8( sel8x8_4(xx) ),
653 cmpnez8( sel8x8_3(xx) ),
654 cmpnez8( sel8x8_2(xx) ),
655 cmpnez8( sel8x8_1(xx) ),
656 cmpnez8( sel8x8_0(xx) )
657 );
658}
659
sewardj38a3f862005-01-13 15:06:51 +0000660/* ------------ Saturating narrowing ------------ */
661
662ULong h_generic_calc_QNarrow32Sx2 ( ULong aa, ULong bb )
663{
664 UInt d = sel32x2_1(aa);
665 UInt c = sel32x2_0(aa);
666 UInt b = sel32x2_1(bb);
667 UInt a = sel32x2_0(bb);
668 return mk16x4(
669 qnarrow32Sto16(d),
670 qnarrow32Sto16(c),
671 qnarrow32Sto16(b),
672 qnarrow32Sto16(a)
673 );
674}
675
676ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb )
677{
678 UShort h = sel16x4_3(aa);
679 UShort g = sel16x4_2(aa);
680 UShort f = sel16x4_1(aa);
681 UShort e = sel16x4_0(aa);
682 UShort d = sel16x4_3(bb);
683 UShort c = sel16x4_2(bb);
684 UShort b = sel16x4_1(bb);
685 UShort a = sel16x4_0(bb);
686 return mk8x8(
687 qnarrow16Sto8(h),
688 qnarrow16Sto8(g),
689 qnarrow16Sto8(f),
690 qnarrow16Sto8(e),
691 qnarrow16Sto8(d),
692 qnarrow16Sto8(c),
693 qnarrow16Sto8(b),
694 qnarrow16Sto8(a)
695 );
696}
697
698ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb )
699{
700 UShort h = sel16x4_3(aa);
701 UShort g = sel16x4_2(aa);
702 UShort f = sel16x4_1(aa);
703 UShort e = sel16x4_0(aa);
704 UShort d = sel16x4_3(bb);
705 UShort c = sel16x4_2(bb);
706 UShort b = sel16x4_1(bb);
707 UShort a = sel16x4_0(bb);
708 return mk8x8(
709 qnarrow16Uto8(h),
710 qnarrow16Uto8(g),
711 qnarrow16Uto8(f),
712 qnarrow16Uto8(e),
713 qnarrow16Uto8(d),
714 qnarrow16Uto8(c),
715 qnarrow16Uto8(b),
716 qnarrow16Uto8(a)
717 );
718}
719
720/* ------------ Interleaving ------------ */
721
722ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
723{
724 return mk8x8(
725 sel8x8_7(aa),
726 sel8x8_7(bb),
727 sel8x8_6(aa),
728 sel8x8_6(bb),
729 sel8x8_5(aa),
730 sel8x8_5(bb),
731 sel8x8_4(aa),
732 sel8x8_4(bb)
733 );
734}
735
736ULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb )
737{
738 return mk8x8(
739 sel8x8_3(aa),
740 sel8x8_3(bb),
741 sel8x8_2(aa),
742 sel8x8_2(bb),
743 sel8x8_1(aa),
744 sel8x8_1(bb),
745 sel8x8_0(aa),
746 sel8x8_0(bb)
747 );
748}
749
750ULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb )
751{
752 return mk16x4(
753 sel16x4_3(aa),
754 sel16x4_3(bb),
755 sel16x4_2(aa),
756 sel16x4_2(bb)
757 );
758}
759
760ULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb )
761{
762 return mk16x4(
763 sel16x4_1(aa),
764 sel16x4_1(bb),
765 sel16x4_0(aa),
766 sel16x4_0(bb)
767 );
768}
769
770ULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb )
771{
772 return mk32x2(
773 sel32x2_1(aa),
774 sel32x2_1(bb)
775 );
776}
777
778ULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb )
779{
780 return mk32x2(
781 sel32x2_0(aa),
782 sel32x2_0(bb)
783 );
784}
785
786
787/* ------------ Shifting ------------ */
788/* Note that because these primops are undefined if the shift amount
789 equals or exceeds the lane width, the shift amount is masked so
790 that the scalar shifts are always in range. In fact, given the
791 semantics of these primops (ShlN16x4, etc) it is an error if in
792 fact we are ever given an out-of-range shift amount.
793*/
794ULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn )
795{
796 /* vassert(nn < 32); */
797 nn &= 31;
798 return mk32x2(
799 shl32( sel32x2_1(xx), nn ),
800 shl32( sel32x2_0(xx), nn )
801 );
802}
803
804ULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn )
805{
806 /* vassert(nn < 16); */
807 nn &= 15;
808 return mk16x4(
809 shl16( sel16x4_3(xx), nn ),
810 shl16( sel16x4_2(xx), nn ),
811 shl16( sel16x4_1(xx), nn ),
812 shl16( sel16x4_0(xx), nn )
813 );
814}
815
816ULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn )
817{
818 /* vassert(nn < 32); */
819 nn &= 31;
820 return mk32x2(
821 shr32( sel32x2_1(xx), nn ),
822 shr32( sel32x2_0(xx), nn )
823 );
824}
825
826ULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn )
827{
828 /* vassert(nn < 16); */
829 nn &= 15;
830 return mk16x4(
831 shr16( sel16x4_3(xx), nn ),
832 shr16( sel16x4_2(xx), nn ),
833 shr16( sel16x4_1(xx), nn ),
834 shr16( sel16x4_0(xx), nn )
835 );
836}
837
838ULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn )
839{
840 /* vassert(nn < 32); */
841 nn &= 31;
842 return mk32x2(
843 sar32( sel32x2_1(xx), nn ),
844 sar32( sel32x2_0(xx), nn )
845 );
846}
847
848ULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn )
849{
850 /* vassert(nn < 16); */
851 nn &= 15;
852 return mk16x4(
853 sar16( sel16x4_3(xx), nn ),
854 sar16( sel16x4_2(xx), nn ),
855 sar16( sel16x4_1(xx), nn ),
856 sar16( sel16x4_0(xx), nn )
857 );
858}
859
860/* ------------ Averaging ------------ */
861
862ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
863{
864 return mk8x8(
865 avg8U( sel8x8_7(xx), sel8x8_7(yy) ),
866 avg8U( sel8x8_6(xx), sel8x8_6(yy) ),
867 avg8U( sel8x8_5(xx), sel8x8_5(yy) ),
868 avg8U( sel8x8_4(xx), sel8x8_4(yy) ),
869 avg8U( sel8x8_3(xx), sel8x8_3(yy) ),
870 avg8U( sel8x8_2(xx), sel8x8_2(yy) ),
871 avg8U( sel8x8_1(xx), sel8x8_1(yy) ),
872 avg8U( sel8x8_0(xx), sel8x8_0(yy) )
873 );
874}
875
876ULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy )
877{
878 return mk16x4(
879 avg16U( sel16x4_3(xx), sel16x4_3(yy) ),
880 avg16U( sel16x4_2(xx), sel16x4_2(yy) ),
881 avg16U( sel16x4_1(xx), sel16x4_1(yy) ),
882 avg16U( sel16x4_0(xx), sel16x4_0(yy) )
883 );
884}
885
886/* ------------ max/min ------------ */
887
888ULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy )
889{
890 return mk16x4(
891 max16S( sel16x4_3(xx), sel16x4_3(yy) ),
892 max16S( sel16x4_2(xx), sel16x4_2(yy) ),
893 max16S( sel16x4_1(xx), sel16x4_1(yy) ),
894 max16S( sel16x4_0(xx), sel16x4_0(yy) )
895 );
896}
897
898ULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy )
899{
900 return mk8x8(
901 max8U( sel8x8_7(xx), sel8x8_7(yy) ),
902 max8U( sel8x8_6(xx), sel8x8_6(yy) ),
903 max8U( sel8x8_5(xx), sel8x8_5(yy) ),
904 max8U( sel8x8_4(xx), sel8x8_4(yy) ),
905 max8U( sel8x8_3(xx), sel8x8_3(yy) ),
906 max8U( sel8x8_2(xx), sel8x8_2(yy) ),
907 max8U( sel8x8_1(xx), sel8x8_1(yy) ),
908 max8U( sel8x8_0(xx), sel8x8_0(yy) )
909 );
910}
911
912ULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy )
913{
914 return mk16x4(
915 min16S( sel16x4_3(xx), sel16x4_3(yy) ),
916 min16S( sel16x4_2(xx), sel16x4_2(yy) ),
917 min16S( sel16x4_1(xx), sel16x4_1(yy) ),
918 min16S( sel16x4_0(xx), sel16x4_0(yy) )
919 );
920}
921
922ULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy )
923{
924 return mk8x8(
925 min8U( sel8x8_7(xx), sel8x8_7(yy) ),
926 min8U( sel8x8_6(xx), sel8x8_6(yy) ),
927 min8U( sel8x8_5(xx), sel8x8_5(yy) ),
928 min8U( sel8x8_4(xx), sel8x8_4(yy) ),
929 min8U( sel8x8_3(xx), sel8x8_3(yy) ),
930 min8U( sel8x8_2(xx), sel8x8_2(yy) ),
931 min8U( sel8x8_1(xx), sel8x8_1(yy) ),
932 min8U( sel8x8_0(xx), sel8x8_0(yy) )
933 );
934}
935
936
937/*---------------------------------------------------------------*/
938/*--- end host-generic/h_generic_simd64.c ---*/
939/*---------------------------------------------------------------*/