blob: e685ad6a3d48708e717909c286d180036bb48a53 [file] [log] [blame]
sewardj38a3f862005-01-13 15:06:51 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_generic_simd64.c ---*/
sewardj38a3f862005-01-13 15:06:51 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardj38a3f862005-01-13 15:06:51 +00009
sewardj752f9062010-05-03 21:38:49 +000010 Copyright (C) 2004-2010 OpenWorks LLP
11 info@open-works.net
sewardj38a3f862005-01-13 15:06:51 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardj38a3f862005-01-13 15:06:51 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardj38a3f862005-01-13 15:06:51 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardj38a3f862005-01-13 15:06:51 +000034*/
35
36/* Generic helper functions for doing 64-bit SIMD arithmetic in cases
37 where the instruction selectors cannot generate code in-line.
38 These are purely back-end entities and cannot be seen/referenced
39 from IR. */
40
41#include "libvex_basictypes.h"
sewardjcef7d3e2009-07-02 12:21:59 +000042#include "host_generic_simd64.h"
sewardj38a3f862005-01-13 15:06:51 +000043
44
45
46/* Tuple/select functions for 32x2 vectors. */
47
48static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
49 return (((ULong)w1) << 32) | ((ULong)w0);
50}
51
52static inline UInt sel32x2_1 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000053 return 0xFFFFFFFF & toUInt(w64 >> 32);
sewardj38a3f862005-01-13 15:06:51 +000054}
55static inline UInt sel32x2_0 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000056 return 0xFFFFFFFF & toUInt(w64);
sewardj38a3f862005-01-13 15:06:51 +000057}
58
59
60/* Tuple/select functions for 16x4 vectors. gcc is pretty hopeless
61 with 64-bit shifts so we give it a hand. */
62
63static inline ULong mk16x4 ( UShort w3, UShort w2,
64 UShort w1, UShort w0 ) {
65 UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2);
66 UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0);
67 return mk32x2(hi32, lo32);
68}
69
70static inline UShort sel16x4_3 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000071 UInt hi32 = toUInt(w64 >> 32);
72 return toUShort(0xFFFF & (hi32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +000073}
74static inline UShort sel16x4_2 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +000075 UInt hi32 = toUInt(w64 >> 32);
76 return toUShort(0xFFFF & hi32);
sewardj38a3f862005-01-13 15:06:51 +000077}
78static inline UShort sel16x4_1 ( ULong w64 ) {
79 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +000080 return toUShort(0xFFFF & (lo32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +000081}
82static inline UShort sel16x4_0 ( ULong w64 ) {
83 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +000084 return toUShort(0xFFFF & lo32);
sewardj38a3f862005-01-13 15:06:51 +000085}
86
87
88/* Tuple/select functions for 8x8 vectors. */
89
90static inline ULong mk8x8 ( UChar w7, UChar w6,
91 UChar w5, UChar w4,
92 UChar w3, UChar w2,
sewardje2ea1762010-09-22 00:56:37 +000093 UChar w1, UChar w0 ) {
sewardj38a3f862005-01-13 15:06:51 +000094 UInt hi32 = (((UInt)w7) << 24) | (((UInt)w6) << 16)
95 | (((UInt)w5) << 8) | (((UInt)w4) << 0);
96 UInt lo32 = (((UInt)w3) << 24) | (((UInt)w2) << 16)
97 | (((UInt)w1) << 8) | (((UInt)w0) << 0);
98 return mk32x2(hi32, lo32);
99}
100
101static inline UChar sel8x8_7 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000102 UInt hi32 = toUInt(w64 >> 32);
103 return toUChar(0xFF & (hi32 >> 24));
sewardj38a3f862005-01-13 15:06:51 +0000104}
105static inline UChar sel8x8_6 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000106 UInt hi32 = toUInt(w64 >> 32);
107 return toUChar(0xFF & (hi32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +0000108}
109static inline UChar sel8x8_5 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000110 UInt hi32 = toUInt(w64 >> 32);
111 return toUChar(0xFF & (hi32 >> 8));
sewardj38a3f862005-01-13 15:06:51 +0000112}
113static inline UChar sel8x8_4 ( ULong w64 ) {
sewardjd19fc162005-02-26 02:16:39 +0000114 UInt hi32 = toUInt(w64 >> 32);
115 return toUChar(0xFF & (hi32 >> 0));
sewardj38a3f862005-01-13 15:06:51 +0000116}
117static inline UChar sel8x8_3 ( ULong w64 ) {
118 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000119 return toUChar(0xFF & (lo32 >> 24));
sewardj38a3f862005-01-13 15:06:51 +0000120}
121static inline UChar sel8x8_2 ( ULong w64 ) {
122 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000123 return toUChar(0xFF & (lo32 >> 16));
sewardj38a3f862005-01-13 15:06:51 +0000124}
125static inline UChar sel8x8_1 ( ULong w64 ) {
126 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000127 return toUChar(0xFF & (lo32 >> 8));
sewardj38a3f862005-01-13 15:06:51 +0000128}
129static inline UChar sel8x8_0 ( ULong w64 ) {
130 UInt lo32 = (UInt)w64;
sewardjd19fc162005-02-26 02:16:39 +0000131 return toUChar(0xFF & (lo32 >> 0));
sewardj38a3f862005-01-13 15:06:51 +0000132}
133
sewardjd166e282008-02-06 11:42:45 +0000134static inline UChar index8x8 ( ULong w64, UChar ix ) {
135 ix &= 7;
136 return toUChar((w64 >> (8*ix)) & 0xFF);
137}
138
sewardj38a3f862005-01-13 15:06:51 +0000139
140/* Scalar helpers. */
141
142static inline Short qadd16S ( Short xx, Short yy )
143{
144 Int t = ((Int)xx) + ((Int)yy);
145 if (t < -32768) t = -32768;
146 if (t > 32767) t = 32767;
147 return (Short)t;
148}
149
150static inline Char qadd8S ( Char xx, Char yy )
151{
152 Int t = ((Int)xx) + ((Int)yy);
153 if (t < -128) t = -128;
154 if (t > 127) t = 127;
155 return (Char)t;
156}
157
158static inline UShort qadd16U ( UShort xx, UShort yy )
159{
160 UInt t = ((UInt)xx) + ((UInt)yy);
161 if (t > 0xFFFF) t = 0xFFFF;
162 return (UShort)t;
163}
164
165static inline UChar qadd8U ( UChar xx, UChar yy )
166{
167 UInt t = ((UInt)xx) + ((UInt)yy);
168 if (t > 0xFF) t = 0xFF;
169 return (UChar)t;
170}
171
172static inline Short qsub16S ( Short xx, Short yy )
173{
174 Int t = ((Int)xx) - ((Int)yy);
175 if (t < -32768) t = -32768;
176 if (t > 32767) t = 32767;
177 return (Short)t;
178}
179
180static inline Char qsub8S ( Char xx, Char yy )
181{
182 Int t = ((Int)xx) - ((Int)yy);
183 if (t < -128) t = -128;
184 if (t > 127) t = 127;
185 return (Char)t;
186}
187
188static inline UShort qsub16U ( UShort xx, UShort yy )
189{
190 Int t = ((Int)xx) - ((Int)yy);
191 if (t < 0) t = 0;
192 if (t > 0xFFFF) t = 0xFFFF;
193 return (UShort)t;
194}
195
196static inline UChar qsub8U ( UChar xx, UChar yy )
197{
198 Int t = ((Int)xx) - ((Int)yy);
199 if (t < 0) t = 0;
200 if (t > 0xFF) t = 0xFF;
201 return (UChar)t;
202}
203
204static inline Short mul16 ( Short xx, Short yy )
205{
206 Int t = ((Int)xx) * ((Int)yy);
207 return (Short)t;
208}
209
sewardjd166e282008-02-06 11:42:45 +0000210static inline Int mul32 ( Int xx, Int yy )
211{
212 Int t = ((Int)xx) * ((Int)yy);
213 return (Int)t;
214}
215
sewardj38a3f862005-01-13 15:06:51 +0000216static inline Short mulhi16S ( Short xx, Short yy )
217{
218 Int t = ((Int)xx) * ((Int)yy);
219 t >>=/*s*/ 16;
220 return (Short)t;
221}
222
223static inline UShort mulhi16U ( UShort xx, UShort yy )
224{
225 UInt t = ((UInt)xx) * ((UInt)yy);
226 t >>=/*u*/ 16;
227 return (UShort)t;
228}
229
230static inline UInt cmpeq32 ( UInt xx, UInt yy )
231{
232 return xx==yy ? 0xFFFFFFFF : 0;
233}
234
235static inline UShort cmpeq16 ( UShort xx, UShort yy )
236{
sewardjd19fc162005-02-26 02:16:39 +0000237 return toUShort(xx==yy ? 0xFFFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000238}
239
240static inline UChar cmpeq8 ( UChar xx, UChar yy )
241{
sewardjd19fc162005-02-26 02:16:39 +0000242 return toUChar(xx==yy ? 0xFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000243}
244
245static inline UInt cmpgt32S ( Int xx, Int yy )
246{
247 return xx>yy ? 0xFFFFFFFF : 0;
248}
249
250static inline UShort cmpgt16S ( Short xx, Short yy )
251{
sewardjd19fc162005-02-26 02:16:39 +0000252 return toUShort(xx>yy ? 0xFFFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000253}
254
255static inline UChar cmpgt8S ( Char xx, Char yy )
256{
sewardjd19fc162005-02-26 02:16:39 +0000257 return toUChar(xx>yy ? 0xFF : 0);
sewardj38a3f862005-01-13 15:06:51 +0000258}
259
sewardj18069182005-01-13 19:16:04 +0000260static inline UInt cmpnez32 ( UInt xx )
261{
262 return xx==0 ? 0 : 0xFFFFFFFF;
263}
264
265static inline UShort cmpnez16 ( UShort xx )
266{
sewardjd19fc162005-02-26 02:16:39 +0000267 return toUShort(xx==0 ? 0 : 0xFFFF);
sewardj18069182005-01-13 19:16:04 +0000268}
269
270static inline UChar cmpnez8 ( UChar xx )
271{
sewardjd19fc162005-02-26 02:16:39 +0000272 return toUChar(xx==0 ? 0 : 0xFF);
sewardj18069182005-01-13 19:16:04 +0000273}
274
sewardj38a3f862005-01-13 15:06:51 +0000275static inline Short qnarrow32Sto16 ( UInt xx0 )
276{
277 Int xx = (Int)xx0;
278 if (xx < -32768) xx = -32768;
279 if (xx > 32767) xx = 32767;
280 return (Short)xx;
281}
282
283static inline Char qnarrow16Sto8 ( UShort xx0 )
284{
285 Short xx = (Short)xx0;
286 if (xx < -128) xx = -128;
287 if (xx > 127) xx = 127;
288 return (Char)xx;
289}
290
291static inline UChar qnarrow16Uto8 ( UShort xx0 )
292{
293 Short xx = (Short)xx0;
294 if (xx < 0) xx = 0;
295 if (xx > 255) xx = 255;
296 return (UChar)xx;
297}
298
299/* shifts: we don't care about out-of-range ones, since
300 that is dealt with at a higher level. */
301
sewardjd166e282008-02-06 11:42:45 +0000302static inline UChar shl8 ( UChar v, UInt n )
303{
304 return toUChar(v << n);
305}
306
sewardjd71ba832006-12-27 01:15:29 +0000307static inline UChar sar8 ( UChar v, UInt n )
308{
309 return toUChar(((Char)v) >> n);
310}
311
sewardj38a3f862005-01-13 15:06:51 +0000312static inline UShort shl16 ( UShort v, UInt n )
313{
sewardjd19fc162005-02-26 02:16:39 +0000314 return toUShort(v << n);
sewardj38a3f862005-01-13 15:06:51 +0000315}
316
317static inline UShort shr16 ( UShort v, UInt n )
318{
sewardjd19fc162005-02-26 02:16:39 +0000319 return toUShort((((UShort)v) >> n));
sewardj38a3f862005-01-13 15:06:51 +0000320}
321
322static inline UShort sar16 ( UShort v, UInt n )
323{
sewardjd19fc162005-02-26 02:16:39 +0000324 return toUShort(((Short)v) >> n);
sewardj38a3f862005-01-13 15:06:51 +0000325}
326
327static inline UInt shl32 ( UInt v, UInt n )
328{
329 return v << n;
330}
331
332static inline UInt shr32 ( UInt v, UInt n )
333{
334 return (((UInt)v) >> n);
335}
336
337static inline UInt sar32 ( UInt v, UInt n )
338{
339 return ((Int)v) >> n;
340}
341
342static inline UChar avg8U ( UChar xx, UChar yy )
343{
344 UInt xxi = (UInt)xx;
345 UInt yyi = (UInt)yy;
346 UInt r = (xxi + yyi + 1) >> 1;
347 return (UChar)r;
348}
349
350static inline UShort avg16U ( UShort xx, UShort yy )
351{
352 UInt xxi = (UInt)xx;
353 UInt yyi = (UInt)yy;
354 UInt r = (xxi + yyi + 1) >> 1;
355 return (UShort)r;
356}
357
358static inline Short max16S ( Short xx, Short yy )
359{
sewardjd19fc162005-02-26 02:16:39 +0000360 return toUShort((xx > yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000361}
362
363static inline UChar max8U ( UChar xx, UChar yy )
364{
sewardjd19fc162005-02-26 02:16:39 +0000365 return toUChar((xx > yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000366}
367
368static inline Short min16S ( Short xx, Short yy )
369{
sewardjd19fc162005-02-26 02:16:39 +0000370 return toUShort((xx < yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000371}
372
373static inline UChar min8U ( UChar xx, UChar yy )
374{
sewardjd19fc162005-02-26 02:16:39 +0000375 return toUChar((xx < yy) ? xx : yy);
sewardj38a3f862005-01-13 15:06:51 +0000376}
377
sewardje2ea1762010-09-22 00:56:37 +0000378static inline UShort hadd16U ( UShort xx, UShort yy )
379{
380 UInt xxi = (UInt)xx;
381 UInt yyi = (UInt)yy;
382 UInt r = (xxi + yyi) >> 1;
383 return (UShort)r;
384}
385
386static inline Short hadd16S ( Short xx, Short yy )
387{
388 Int xxi = (Int)xx;
389 Int yyi = (Int)yy;
390 Int r = (xxi + yyi) >> 1;
391 return (Short)r;
392}
393
394static inline UShort hsub16U ( UShort xx, UShort yy )
395{
396 UInt xxi = (UInt)xx;
397 UInt yyi = (UInt)yy;
398 UInt r = (xxi - yyi) >> 1;
399 return (UShort)r;
400}
401
402static inline Short hsub16S ( Short xx, Short yy )
403{
404 Int xxi = (Int)xx;
405 Int yyi = (Int)yy;
406 Int r = (xxi - yyi) >> 1;
407 return (Short)r;
408}
409
410static inline UChar hadd8U ( UChar xx, UChar yy )
411{
412 UInt xxi = (UInt)xx;
413 UInt yyi = (UInt)yy;
414 UInt r = (xxi + yyi) >> 1;
415 return (UChar)r;
416}
417
418static inline Char hadd8S ( Char xx, Char yy )
419{
420 Int xxi = (Int)xx;
421 Int yyi = (Int)yy;
422 Int r = (xxi + yyi) >> 1;
423 return (Char)r;
424}
425
426static inline UChar hsub8U ( UChar xx, UChar yy )
427{
428 UInt xxi = (UInt)xx;
429 UInt yyi = (UInt)yy;
430 UInt r = (xxi - yyi) >> 1;
431 return (UChar)r;
432}
433
434static inline Char hsub8S ( Char xx, Char yy )
435{
436 Int xxi = (Int)xx;
437 Int yyi = (Int)yy;
438 Int r = (xxi - yyi) >> 1;
439 return (Char)r;
440}
441
442
sewardj38a3f862005-01-13 15:06:51 +0000443/* ----------------------------------------------------- */
444/* Start of the externally visible functions. These simply
445 implement the corresponding IR primops. */
446/* ----------------------------------------------------- */
447
448/* ------------ Normal addition ------------ */
449
450ULong h_generic_calc_Add32x2 ( ULong xx, ULong yy )
451{
452 return mk32x2(
453 sel32x2_1(xx) + sel32x2_1(yy),
454 sel32x2_0(xx) + sel32x2_0(yy)
455 );
456}
457
458ULong h_generic_calc_Add16x4 ( ULong xx, ULong yy )
459{
460 return mk16x4(
sewardjd19fc162005-02-26 02:16:39 +0000461 toUShort( sel16x4_3(xx) + sel16x4_3(yy) ),
462 toUShort( sel16x4_2(xx) + sel16x4_2(yy) ),
463 toUShort( sel16x4_1(xx) + sel16x4_1(yy) ),
464 toUShort( sel16x4_0(xx) + sel16x4_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000465 );
466}
467
468ULong h_generic_calc_Add8x8 ( ULong xx, ULong yy )
469{
470 return mk8x8(
sewardjd19fc162005-02-26 02:16:39 +0000471 toUChar( sel8x8_7(xx) + sel8x8_7(yy) ),
472 toUChar( sel8x8_6(xx) + sel8x8_6(yy) ),
473 toUChar( sel8x8_5(xx) + sel8x8_5(yy) ),
474 toUChar( sel8x8_4(xx) + sel8x8_4(yy) ),
475 toUChar( sel8x8_3(xx) + sel8x8_3(yy) ),
476 toUChar( sel8x8_2(xx) + sel8x8_2(yy) ),
477 toUChar( sel8x8_1(xx) + sel8x8_1(yy) ),
478 toUChar( sel8x8_0(xx) + sel8x8_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000479 );
480}
481
482/* ------------ Saturating addition ------------ */
483
484ULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy )
485{
486 return mk16x4(
487 qadd16S( sel16x4_3(xx), sel16x4_3(yy) ),
488 qadd16S( sel16x4_2(xx), sel16x4_2(yy) ),
489 qadd16S( sel16x4_1(xx), sel16x4_1(yy) ),
490 qadd16S( sel16x4_0(xx), sel16x4_0(yy) )
491 );
492}
493
494ULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy )
495{
496 return mk8x8(
497 qadd8S( sel8x8_7(xx), sel8x8_7(yy) ),
498 qadd8S( sel8x8_6(xx), sel8x8_6(yy) ),
499 qadd8S( sel8x8_5(xx), sel8x8_5(yy) ),
500 qadd8S( sel8x8_4(xx), sel8x8_4(yy) ),
501 qadd8S( sel8x8_3(xx), sel8x8_3(yy) ),
502 qadd8S( sel8x8_2(xx), sel8x8_2(yy) ),
503 qadd8S( sel8x8_1(xx), sel8x8_1(yy) ),
504 qadd8S( sel8x8_0(xx), sel8x8_0(yy) )
505 );
506}
507
508ULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy )
509{
510 return mk16x4(
511 qadd16U( sel16x4_3(xx), sel16x4_3(yy) ),
512 qadd16U( sel16x4_2(xx), sel16x4_2(yy) ),
513 qadd16U( sel16x4_1(xx), sel16x4_1(yy) ),
514 qadd16U( sel16x4_0(xx), sel16x4_0(yy) )
515 );
516}
517
518ULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy )
519{
520 return mk8x8(
521 qadd8U( sel8x8_7(xx), sel8x8_7(yy) ),
522 qadd8U( sel8x8_6(xx), sel8x8_6(yy) ),
523 qadd8U( sel8x8_5(xx), sel8x8_5(yy) ),
524 qadd8U( sel8x8_4(xx), sel8x8_4(yy) ),
525 qadd8U( sel8x8_3(xx), sel8x8_3(yy) ),
526 qadd8U( sel8x8_2(xx), sel8x8_2(yy) ),
527 qadd8U( sel8x8_1(xx), sel8x8_1(yy) ),
528 qadd8U( sel8x8_0(xx), sel8x8_0(yy) )
529 );
530}
531
532/* ------------ Normal subtraction ------------ */
533
534ULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy )
535{
536 return mk32x2(
537 sel32x2_1(xx) - sel32x2_1(yy),
538 sel32x2_0(xx) - sel32x2_0(yy)
539 );
540}
541
542ULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy )
543{
544 return mk16x4(
sewardjd19fc162005-02-26 02:16:39 +0000545 toUShort( sel16x4_3(xx) - sel16x4_3(yy) ),
546 toUShort( sel16x4_2(xx) - sel16x4_2(yy) ),
547 toUShort( sel16x4_1(xx) - sel16x4_1(yy) ),
548 toUShort( sel16x4_0(xx) - sel16x4_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000549 );
550}
551
552ULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy )
553{
554 return mk8x8(
sewardjd19fc162005-02-26 02:16:39 +0000555 toUChar( sel8x8_7(xx) - sel8x8_7(yy) ),
556 toUChar( sel8x8_6(xx) - sel8x8_6(yy) ),
557 toUChar( sel8x8_5(xx) - sel8x8_5(yy) ),
558 toUChar( sel8x8_4(xx) - sel8x8_4(yy) ),
559 toUChar( sel8x8_3(xx) - sel8x8_3(yy) ),
560 toUChar( sel8x8_2(xx) - sel8x8_2(yy) ),
561 toUChar( sel8x8_1(xx) - sel8x8_1(yy) ),
562 toUChar( sel8x8_0(xx) - sel8x8_0(yy) )
sewardj38a3f862005-01-13 15:06:51 +0000563 );
564}
565
566/* ------------ Saturating subtraction ------------ */
567
568ULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy )
569{
570 return mk16x4(
571 qsub16S( sel16x4_3(xx), sel16x4_3(yy) ),
572 qsub16S( sel16x4_2(xx), sel16x4_2(yy) ),
573 qsub16S( sel16x4_1(xx), sel16x4_1(yy) ),
574 qsub16S( sel16x4_0(xx), sel16x4_0(yy) )
575 );
576}
577
578ULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy )
579{
580 return mk8x8(
581 qsub8S( sel8x8_7(xx), sel8x8_7(yy) ),
582 qsub8S( sel8x8_6(xx), sel8x8_6(yy) ),
583 qsub8S( sel8x8_5(xx), sel8x8_5(yy) ),
584 qsub8S( sel8x8_4(xx), sel8x8_4(yy) ),
585 qsub8S( sel8x8_3(xx), sel8x8_3(yy) ),
586 qsub8S( sel8x8_2(xx), sel8x8_2(yy) ),
587 qsub8S( sel8x8_1(xx), sel8x8_1(yy) ),
588 qsub8S( sel8x8_0(xx), sel8x8_0(yy) )
589 );
590}
591
592ULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy )
593{
594 return mk16x4(
595 qsub16U( sel16x4_3(xx), sel16x4_3(yy) ),
596 qsub16U( sel16x4_2(xx), sel16x4_2(yy) ),
597 qsub16U( sel16x4_1(xx), sel16x4_1(yy) ),
598 qsub16U( sel16x4_0(xx), sel16x4_0(yy) )
599 );
600}
601
602ULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy )
603{
604 return mk8x8(
605 qsub8U( sel8x8_7(xx), sel8x8_7(yy) ),
606 qsub8U( sel8x8_6(xx), sel8x8_6(yy) ),
607 qsub8U( sel8x8_5(xx), sel8x8_5(yy) ),
608 qsub8U( sel8x8_4(xx), sel8x8_4(yy) ),
609 qsub8U( sel8x8_3(xx), sel8x8_3(yy) ),
610 qsub8U( sel8x8_2(xx), sel8x8_2(yy) ),
611 qsub8U( sel8x8_1(xx), sel8x8_1(yy) ),
612 qsub8U( sel8x8_0(xx), sel8x8_0(yy) )
613 );
614}
615
616/* ------------ Multiplication ------------ */
617
618ULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy )
619{
620 return mk16x4(
621 mul16( sel16x4_3(xx), sel16x4_3(yy) ),
622 mul16( sel16x4_2(xx), sel16x4_2(yy) ),
623 mul16( sel16x4_1(xx), sel16x4_1(yy) ),
624 mul16( sel16x4_0(xx), sel16x4_0(yy) )
625 );
626}
627
sewardjd166e282008-02-06 11:42:45 +0000628ULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy )
629{
630 return mk32x2(
631 mul32( sel32x2_1(xx), sel32x2_1(yy) ),
632 mul32( sel32x2_0(xx), sel32x2_0(yy) )
633 );
634}
635
sewardj38a3f862005-01-13 15:06:51 +0000636ULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy )
637{
638 return mk16x4(
639 mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ),
640 mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ),
641 mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ),
642 mulhi16S( sel16x4_0(xx), sel16x4_0(yy) )
643 );
644}
645
646ULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy )
647{
648 return mk16x4(
649 mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ),
650 mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ),
651 mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ),
652 mulhi16U( sel16x4_0(xx), sel16x4_0(yy) )
653 );
654}
655
656/* ------------ Comparison ------------ */
657
658ULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy )
659{
660 return mk32x2(
661 cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ),
662 cmpeq32( sel32x2_0(xx), sel32x2_0(yy) )
663 );
664}
665
666ULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy )
667{
668 return mk16x4(
669 cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ),
670 cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ),
671 cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ),
672 cmpeq16( sel16x4_0(xx), sel16x4_0(yy) )
673 );
674}
675
676ULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy )
677{
678 return mk8x8(
679 cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ),
680 cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ),
681 cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ),
682 cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ),
683 cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ),
684 cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ),
685 cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ),
686 cmpeq8( sel8x8_0(xx), sel8x8_0(yy) )
687 );
688}
689
690ULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy )
691{
692 return mk32x2(
693 cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ),
694 cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) )
695 );
696}
697
698ULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy )
699{
700 return mk16x4(
701 cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ),
702 cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ),
703 cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ),
704 cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) )
705 );
706}
707
708ULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy )
709{
710 return mk8x8(
711 cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ),
712 cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ),
713 cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ),
714 cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ),
715 cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ),
716 cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ),
717 cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ),
718 cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) )
719 );
720}
721
sewardj18069182005-01-13 19:16:04 +0000722ULong h_generic_calc_CmpNEZ32x2 ( ULong xx )
723{
724 return mk32x2(
725 cmpnez32( sel32x2_1(xx) ),
726 cmpnez32( sel32x2_0(xx) )
727 );
728}
729
730ULong h_generic_calc_CmpNEZ16x4 ( ULong xx )
731{
732 return mk16x4(
733 cmpnez16( sel16x4_3(xx) ),
734 cmpnez16( sel16x4_2(xx) ),
735 cmpnez16( sel16x4_1(xx) ),
736 cmpnez16( sel16x4_0(xx) )
737 );
738}
739
740ULong h_generic_calc_CmpNEZ8x8 ( ULong xx )
741{
742 return mk8x8(
743 cmpnez8( sel8x8_7(xx) ),
744 cmpnez8( sel8x8_6(xx) ),
745 cmpnez8( sel8x8_5(xx) ),
746 cmpnez8( sel8x8_4(xx) ),
747 cmpnez8( sel8x8_3(xx) ),
748 cmpnez8( sel8x8_2(xx) ),
749 cmpnez8( sel8x8_1(xx) ),
750 cmpnez8( sel8x8_0(xx) )
751 );
752}
753
sewardj38a3f862005-01-13 15:06:51 +0000754/* ------------ Saturating narrowing ------------ */
755
756ULong h_generic_calc_QNarrow32Sx2 ( ULong aa, ULong bb )
757{
758 UInt d = sel32x2_1(aa);
759 UInt c = sel32x2_0(aa);
760 UInt b = sel32x2_1(bb);
761 UInt a = sel32x2_0(bb);
762 return mk16x4(
763 qnarrow32Sto16(d),
764 qnarrow32Sto16(c),
765 qnarrow32Sto16(b),
766 qnarrow32Sto16(a)
767 );
768}
769
770ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb )
771{
772 UShort h = sel16x4_3(aa);
773 UShort g = sel16x4_2(aa);
774 UShort f = sel16x4_1(aa);
775 UShort e = sel16x4_0(aa);
776 UShort d = sel16x4_3(bb);
777 UShort c = sel16x4_2(bb);
778 UShort b = sel16x4_1(bb);
779 UShort a = sel16x4_0(bb);
780 return mk8x8(
781 qnarrow16Sto8(h),
782 qnarrow16Sto8(g),
783 qnarrow16Sto8(f),
784 qnarrow16Sto8(e),
785 qnarrow16Sto8(d),
786 qnarrow16Sto8(c),
787 qnarrow16Sto8(b),
788 qnarrow16Sto8(a)
789 );
790}
791
792ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb )
793{
794 UShort h = sel16x4_3(aa);
795 UShort g = sel16x4_2(aa);
796 UShort f = sel16x4_1(aa);
797 UShort e = sel16x4_0(aa);
798 UShort d = sel16x4_3(bb);
799 UShort c = sel16x4_2(bb);
800 UShort b = sel16x4_1(bb);
801 UShort a = sel16x4_0(bb);
802 return mk8x8(
803 qnarrow16Uto8(h),
804 qnarrow16Uto8(g),
805 qnarrow16Uto8(f),
806 qnarrow16Uto8(e),
807 qnarrow16Uto8(d),
808 qnarrow16Uto8(c),
809 qnarrow16Uto8(b),
810 qnarrow16Uto8(a)
811 );
812}
813
814/* ------------ Interleaving ------------ */
815
816ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
817{
818 return mk8x8(
819 sel8x8_7(aa),
820 sel8x8_7(bb),
821 sel8x8_6(aa),
822 sel8x8_6(bb),
823 sel8x8_5(aa),
824 sel8x8_5(bb),
825 sel8x8_4(aa),
826 sel8x8_4(bb)
827 );
828}
829
830ULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb )
831{
832 return mk8x8(
833 sel8x8_3(aa),
834 sel8x8_3(bb),
835 sel8x8_2(aa),
836 sel8x8_2(bb),
837 sel8x8_1(aa),
838 sel8x8_1(bb),
839 sel8x8_0(aa),
840 sel8x8_0(bb)
841 );
842}
843
844ULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb )
845{
846 return mk16x4(
847 sel16x4_3(aa),
848 sel16x4_3(bb),
849 sel16x4_2(aa),
850 sel16x4_2(bb)
851 );
852}
853
854ULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb )
855{
856 return mk16x4(
857 sel16x4_1(aa),
858 sel16x4_1(bb),
859 sel16x4_0(aa),
860 sel16x4_0(bb)
861 );
862}
863
864ULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb )
865{
866 return mk32x2(
867 sel32x2_1(aa),
868 sel32x2_1(bb)
869 );
870}
871
872ULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb )
873{
874 return mk32x2(
875 sel32x2_0(aa),
876 sel32x2_0(bb)
877 );
878}
879
sewardjd166e282008-02-06 11:42:45 +0000880/* ------------ Concatenation ------------ */
881
882ULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb )
883{
884 return mk16x4(
885 sel16x4_3(aa),
886 sel16x4_1(aa),
887 sel16x4_3(bb),
888 sel16x4_1(bb)
889 );
890}
891
892ULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb )
893{
894 return mk16x4(
895 sel16x4_2(aa),
896 sel16x4_0(aa),
897 sel16x4_2(bb),
898 sel16x4_0(bb)
899 );
900}
901
902/* misc hack looking for a proper home */
903ULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb )
904{
905 return mk8x8(
906 index8x8(aa, sel8x8_7(bb)),
907 index8x8(aa, sel8x8_6(bb)),
908 index8x8(aa, sel8x8_5(bb)),
909 index8x8(aa, sel8x8_4(bb)),
910 index8x8(aa, sel8x8_3(bb)),
911 index8x8(aa, sel8x8_2(bb)),
912 index8x8(aa, sel8x8_1(bb)),
913 index8x8(aa, sel8x8_0(bb))
914 );
915}
sewardj38a3f862005-01-13 15:06:51 +0000916
917/* ------------ Shifting ------------ */
918/* Note that because these primops are undefined if the shift amount
919 equals or exceeds the lane width, the shift amount is masked so
920 that the scalar shifts are always in range. In fact, given the
921 semantics of these primops (ShlN16x4, etc) it is an error if in
922 fact we are ever given an out-of-range shift amount.
923*/
924ULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn )
925{
926 /* vassert(nn < 32); */
927 nn &= 31;
928 return mk32x2(
929 shl32( sel32x2_1(xx), nn ),
930 shl32( sel32x2_0(xx), nn )
931 );
932}
933
934ULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn )
935{
936 /* vassert(nn < 16); */
937 nn &= 15;
938 return mk16x4(
939 shl16( sel16x4_3(xx), nn ),
940 shl16( sel16x4_2(xx), nn ),
941 shl16( sel16x4_1(xx), nn ),
942 shl16( sel16x4_0(xx), nn )
943 );
944}
945
sewardjd166e282008-02-06 11:42:45 +0000946ULong h_generic_calc_ShlN8x8 ( ULong xx, UInt nn )
947{
948 /* vassert(nn < 8); */
949 nn &= 7;
950 return mk8x8(
951 shl8( sel8x8_7(xx), nn ),
952 shl8( sel8x8_6(xx), nn ),
953 shl8( sel8x8_5(xx), nn ),
954 shl8( sel8x8_4(xx), nn ),
955 shl8( sel8x8_3(xx), nn ),
956 shl8( sel8x8_2(xx), nn ),
957 shl8( sel8x8_1(xx), nn ),
958 shl8( sel8x8_0(xx), nn )
959 );
960}
961
sewardj38a3f862005-01-13 15:06:51 +0000962ULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn )
963{
964 /* vassert(nn < 32); */
965 nn &= 31;
966 return mk32x2(
967 shr32( sel32x2_1(xx), nn ),
968 shr32( sel32x2_0(xx), nn )
969 );
970}
971
972ULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn )
973{
974 /* vassert(nn < 16); */
975 nn &= 15;
976 return mk16x4(
977 shr16( sel16x4_3(xx), nn ),
978 shr16( sel16x4_2(xx), nn ),
979 shr16( sel16x4_1(xx), nn ),
980 shr16( sel16x4_0(xx), nn )
981 );
982}
983
984ULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn )
985{
986 /* vassert(nn < 32); */
987 nn &= 31;
988 return mk32x2(
989 sar32( sel32x2_1(xx), nn ),
990 sar32( sel32x2_0(xx), nn )
991 );
992}
993
994ULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn )
995{
996 /* vassert(nn < 16); */
997 nn &= 15;
998 return mk16x4(
999 sar16( sel16x4_3(xx), nn ),
1000 sar16( sel16x4_2(xx), nn ),
1001 sar16( sel16x4_1(xx), nn ),
1002 sar16( sel16x4_0(xx), nn )
1003 );
1004}
1005
sewardjd71ba832006-12-27 01:15:29 +00001006ULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn )
1007{
1008 /* vassert(nn < 8); */
1009 nn &= 7;
1010 return mk8x8(
1011 sar8( sel8x8_7(xx), nn ),
1012 sar8( sel8x8_6(xx), nn ),
1013 sar8( sel8x8_5(xx), nn ),
1014 sar8( sel8x8_4(xx), nn ),
1015 sar8( sel8x8_3(xx), nn ),
1016 sar8( sel8x8_2(xx), nn ),
1017 sar8( sel8x8_1(xx), nn ),
1018 sar8( sel8x8_0(xx), nn )
1019 );
1020}
1021
sewardj38a3f862005-01-13 15:06:51 +00001022/* ------------ Averaging ------------ */
1023
1024ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
1025{
1026 return mk8x8(
1027 avg8U( sel8x8_7(xx), sel8x8_7(yy) ),
1028 avg8U( sel8x8_6(xx), sel8x8_6(yy) ),
1029 avg8U( sel8x8_5(xx), sel8x8_5(yy) ),
1030 avg8U( sel8x8_4(xx), sel8x8_4(yy) ),
1031 avg8U( sel8x8_3(xx), sel8x8_3(yy) ),
1032 avg8U( sel8x8_2(xx), sel8x8_2(yy) ),
1033 avg8U( sel8x8_1(xx), sel8x8_1(yy) ),
1034 avg8U( sel8x8_0(xx), sel8x8_0(yy) )
1035 );
1036}
1037
1038ULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy )
1039{
1040 return mk16x4(
1041 avg16U( sel16x4_3(xx), sel16x4_3(yy) ),
1042 avg16U( sel16x4_2(xx), sel16x4_2(yy) ),
1043 avg16U( sel16x4_1(xx), sel16x4_1(yy) ),
1044 avg16U( sel16x4_0(xx), sel16x4_0(yy) )
1045 );
1046}
1047
1048/* ------------ max/min ------------ */
1049
1050ULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy )
1051{
1052 return mk16x4(
1053 max16S( sel16x4_3(xx), sel16x4_3(yy) ),
1054 max16S( sel16x4_2(xx), sel16x4_2(yy) ),
1055 max16S( sel16x4_1(xx), sel16x4_1(yy) ),
1056 max16S( sel16x4_0(xx), sel16x4_0(yy) )
1057 );
1058}
1059
1060ULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy )
1061{
1062 return mk8x8(
1063 max8U( sel8x8_7(xx), sel8x8_7(yy) ),
1064 max8U( sel8x8_6(xx), sel8x8_6(yy) ),
1065 max8U( sel8x8_5(xx), sel8x8_5(yy) ),
1066 max8U( sel8x8_4(xx), sel8x8_4(yy) ),
1067 max8U( sel8x8_3(xx), sel8x8_3(yy) ),
1068 max8U( sel8x8_2(xx), sel8x8_2(yy) ),
1069 max8U( sel8x8_1(xx), sel8x8_1(yy) ),
1070 max8U( sel8x8_0(xx), sel8x8_0(yy) )
1071 );
1072}
1073
1074ULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy )
1075{
1076 return mk16x4(
1077 min16S( sel16x4_3(xx), sel16x4_3(yy) ),
1078 min16S( sel16x4_2(xx), sel16x4_2(yy) ),
1079 min16S( sel16x4_1(xx), sel16x4_1(yy) ),
1080 min16S( sel16x4_0(xx), sel16x4_0(yy) )
1081 );
1082}
1083
1084ULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy )
1085{
1086 return mk8x8(
1087 min8U( sel8x8_7(xx), sel8x8_7(yy) ),
1088 min8U( sel8x8_6(xx), sel8x8_6(yy) ),
1089 min8U( sel8x8_5(xx), sel8x8_5(yy) ),
1090 min8U( sel8x8_4(xx), sel8x8_4(yy) ),
1091 min8U( sel8x8_3(xx), sel8x8_3(yy) ),
1092 min8U( sel8x8_2(xx), sel8x8_2(yy) ),
1093 min8U( sel8x8_1(xx), sel8x8_1(yy) ),
1094 min8U( sel8x8_0(xx), sel8x8_0(yy) )
1095 );
1096}
1097
sewardje2ea1762010-09-22 00:56:37 +00001098/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */
1099
1100/* Tuple/select functions for 16x2 vectors. */
1101static inline UInt mk16x2 ( UShort w1, UShort w2 ) {
1102 return (((UInt)w1) << 16) | ((UInt)w2);
1103}
1104
1105static inline UShort sel16x2_1 ( UInt w32 ) {
1106 return 0xFFFF & (UShort)(w32 >> 16);
1107}
1108static inline UShort sel16x2_0 ( UInt w32 ) {
1109 return 0xFFFF & (UShort)(w32);
1110}
1111
1112static inline UInt mk8x4 ( UChar w3, UChar w2,
1113 UChar w1, UChar w0 ) {
1114 UInt w32 = (((UInt)w3) << 24) | (((UInt)w2) << 16)
1115 | (((UInt)w1) << 8) | (((UInt)w0) << 0);
1116 return w32;
1117}
1118
1119static inline UChar sel8x4_3 ( UInt w32 ) {
1120 return toUChar(0xFF & (w32 >> 24));
1121}
1122static inline UChar sel8x4_2 ( UInt w32 ) {
1123 return toUChar(0xFF & (w32 >> 16));
1124}
1125static inline UChar sel8x4_1 ( UInt w32 ) {
1126 return toUChar(0xFF & (w32 >> 8));
1127}
1128static inline UChar sel8x4_0 ( UInt w32 ) {
1129 return toUChar(0xFF & (w32 >> 0));
1130}
1131
1132
1133/* ----------------------------------------------------- */
1134/* More externally visible functions. These simply
1135 implement the corresponding IR primops. */
1136/* ----------------------------------------------------- */
1137
1138/* ------ 16x2 ------ */
1139
1140UInt h_generic_calc_Add16x2 ( UInt xx, UInt yy )
1141{
1142 return mk16x2( sel16x2_1(xx) + sel16x2_1(yy),
1143 sel16x2_0(xx) + sel16x2_0(yy) );
1144}
1145
1146UInt h_generic_calc_Sub16x2 ( UInt xx, UInt yy )
1147{
1148 return mk16x2( sel16x2_1(xx) - sel16x2_1(yy),
1149 sel16x2_0(xx) - sel16x2_0(yy) );
1150}
1151
1152UInt h_generic_calc_HAdd16Ux2 ( UInt xx, UInt yy )
1153{
1154 return mk16x2( hadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
1155 hadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1156}
1157
1158UInt h_generic_calc_HAdd16Sx2 ( UInt xx, UInt yy )
1159{
1160 return mk16x2( hadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
1161 hadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1162}
1163
1164UInt h_generic_calc_HSub16Ux2 ( UInt xx, UInt yy )
1165{
1166 return mk16x2( hsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
1167 hsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1168}
1169
1170UInt h_generic_calc_HSub16Sx2 ( UInt xx, UInt yy )
1171{
1172 return mk16x2( hsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
1173 hsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1174}
1175
1176UInt h_generic_calc_QAdd16Ux2 ( UInt xx, UInt yy )
1177{
1178 return mk16x2( qadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
1179 qadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1180}
1181
1182UInt h_generic_calc_QAdd16Sx2 ( UInt xx, UInt yy )
1183{
1184 return mk16x2( qadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
1185 qadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1186}
1187
1188UInt h_generic_calc_QSub16Ux2 ( UInt xx, UInt yy )
1189{
1190 return mk16x2( qsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
1191 qsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1192}
1193
1194UInt h_generic_calc_QSub16Sx2 ( UInt xx, UInt yy )
1195{
1196 return mk16x2( qsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
1197 qsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1198}
1199
1200/* ------ 8x4 ------ */
1201
1202UInt h_generic_calc_Add8x4 ( UInt xx, UInt yy )
1203{
1204 return mk8x4(
1205 sel8x4_3(xx) + sel8x4_3(yy),
1206 sel8x4_2(xx) + sel8x4_2(yy),
1207 sel8x4_1(xx) + sel8x4_1(yy),
1208 sel8x4_0(xx) + sel8x4_0(yy)
1209 );
1210}
1211
1212UInt h_generic_calc_Sub8x4 ( UInt xx, UInt yy )
1213{
1214 return mk8x4(
1215 sel8x4_3(xx) - sel8x4_3(yy),
1216 sel8x4_2(xx) - sel8x4_2(yy),
1217 sel8x4_1(xx) - sel8x4_1(yy),
1218 sel8x4_0(xx) - sel8x4_0(yy)
1219 );
1220}
1221
1222UInt h_generic_calc_HAdd8Ux4 ( UInt xx, UInt yy )
1223{
1224 return mk8x4(
1225 hadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
1226 hadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
1227 hadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
1228 hadd8U( sel8x4_0(xx), sel8x4_0(yy) )
1229 );
1230}
1231
1232UInt h_generic_calc_HAdd8Sx4 ( UInt xx, UInt yy )
1233{
1234 return mk8x4(
1235 hadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
1236 hadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
1237 hadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
1238 hadd8S( sel8x4_0(xx), sel8x4_0(yy) )
1239 );
1240}
1241
1242UInt h_generic_calc_HSub8Ux4 ( UInt xx, UInt yy )
1243{
1244 return mk8x4(
1245 hsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
1246 hsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
1247 hsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
1248 hsub8U( sel8x4_0(xx), sel8x4_0(yy) )
1249 );
1250}
1251
1252UInt h_generic_calc_HSub8Sx4 ( UInt xx, UInt yy )
1253{
1254 return mk8x4(
1255 hsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
1256 hsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
1257 hsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
1258 hsub8S( sel8x4_0(xx), sel8x4_0(yy) )
1259 );
1260}
1261
1262UInt h_generic_calc_QAdd8Ux4 ( UInt xx, UInt yy )
1263{
1264 return mk8x4(
1265 qadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
1266 qadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
1267 qadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
1268 qadd8U( sel8x4_0(xx), sel8x4_0(yy) )
1269 );
1270}
1271
1272UInt h_generic_calc_QAdd8Sx4 ( UInt xx, UInt yy )
1273{
1274 return mk8x4(
1275 qadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
1276 qadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
1277 qadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
1278 qadd8S( sel8x4_0(xx), sel8x4_0(yy) )
1279 );
1280}
1281
1282UInt h_generic_calc_QSub8Ux4 ( UInt xx, UInt yy )
1283{
1284 return mk8x4(
1285 qsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
1286 qsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
1287 qsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
1288 qsub8U( sel8x4_0(xx), sel8x4_0(yy) )
1289 );
1290}
1291
1292UInt h_generic_calc_QSub8Sx4 ( UInt xx, UInt yy )
1293{
1294 return mk8x4(
1295 qsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
1296 qsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
1297 qsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
1298 qsub8S( sel8x4_0(xx), sel8x4_0(yy) )
1299 );
1300}
1301
1302UInt h_generic_calc_CmpNEZ16x2 ( UInt xx )
1303{
1304 return mk16x2(
1305 cmpnez16( sel16x2_1(xx) ),
1306 cmpnez16( sel16x2_0(xx) )
1307 );
1308}
1309
1310UInt h_generic_calc_CmpNEZ8x4 ( UInt xx )
1311{
1312 return mk8x4(
1313 cmpnez8( sel8x4_3(xx) ),
1314 cmpnez8( sel8x4_2(xx) ),
1315 cmpnez8( sel8x4_1(xx) ),
1316 cmpnez8( sel8x4_0(xx) )
1317 );
1318}
sewardj38a3f862005-01-13 15:06:51 +00001319
1320/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00001321/*--- end host_generic_simd64.c ---*/
sewardj38a3f862005-01-13 15:06:51 +00001322/*---------------------------------------------------------------*/