blob: a72796ba4a68b8b1c05ce97a1ca51186eee4c0e4 [file] [log] [blame]
Ben Murdoch097c5b22016-05-18 11:27:45 +01001/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __TMMINTRIN_H
25#define __TMMINTRIN_H
26
27#include <pmmintrin.h>
28
29/* Define the default attributes for the functions in this file. */
30#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
31
32/// \brief Computes the absolute value of each of the packed 8-bit signed
33/// integers in the source operand and stores the 8-bit unsigned integer
34/// results in the destination.
35///
36/// \headerfile <x86intrin.h>
37///
38/// This intrinsic corresponds to the \c PABSB instruction.
39///
40/// \param __a
41/// A 64-bit vector of [8 x i8].
42/// \returns A 64-bit integer vector containing the absolute values of the
43/// elements in the operand.
44static __inline__ __m64 __DEFAULT_FN_ATTRS
45_mm_abs_pi8(__m64 __a)
46{
47 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
48}
49
50/// \brief Computes the absolute value of each of the packed 8-bit signed
51/// integers in the source operand and stores the 8-bit unsigned integer
52/// results in the destination.
53///
54/// \headerfile <x86intrin.h>
55///
56/// This intrinsic corresponds to the \c VPABSB instruction.
57///
58/// \param __a
59/// A 128-bit vector of [16 x i8].
60/// \returns A 128-bit integer vector containing the absolute values of the
61/// elements in the operand.
62static __inline__ __m128i __DEFAULT_FN_ATTRS
63_mm_abs_epi8(__m128i __a)
64{
65 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
66}
67
68/// \brief Computes the absolute value of each of the packed 16-bit signed
69/// integers in the source operand and stores the 16-bit unsigned integer
70/// results in the destination.
71///
72/// \headerfile <x86intrin.h>
73///
74/// This intrinsic corresponds to the \c PABSW instruction.
75///
76/// \param __a
77/// A 64-bit vector of [4 x i16].
78/// \returns A 64-bit integer vector containing the absolute values of the
79/// elements in the operand.
80static __inline__ __m64 __DEFAULT_FN_ATTRS
81_mm_abs_pi16(__m64 __a)
82{
83 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
84}
85
86/// \brief Computes the absolute value of each of the packed 16-bit signed
87/// integers in the source operand and stores the 16-bit unsigned integer
88/// results in the destination.
89///
90/// \headerfile <x86intrin.h>
91///
92/// This intrinsic corresponds to the \c VPABSW instruction.
93///
94/// \param __a
95/// A 128-bit vector of [8 x i16].
96/// \returns A 128-bit integer vector containing the absolute values of the
97/// elements in the operand.
98static __inline__ __m128i __DEFAULT_FN_ATTRS
99_mm_abs_epi16(__m128i __a)
100{
101 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
102}
103
104/// \brief Computes the absolute value of each of the packed 32-bit signed
105/// integers in the source operand and stores the 32-bit unsigned integer
106/// results in the destination.
107///
108/// \headerfile <x86intrin.h>
109///
110/// This intrinsic corresponds to the \c PABSD instruction.
111///
112/// \param __a
113/// A 64-bit vector of [2 x i32].
114/// \returns A 64-bit integer vector containing the absolute values of the
115/// elements in the operand.
116static __inline__ __m64 __DEFAULT_FN_ATTRS
117_mm_abs_pi32(__m64 __a)
118{
119 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
120}
121
122/// \brief Computes the absolute value of each of the packed 32-bit signed
123/// integers in the source operand and stores the 32-bit unsigned integer
124/// results in the destination.
125///
126/// \headerfile <x86intrin.h>
127///
128/// This intrinsic corresponds to the \c VPABSD instruction.
129///
130/// \param __a
131/// A 128-bit vector of [4 x i32].
132/// \returns A 128-bit integer vector containing the absolute values of the
133/// elements in the operand.
134static __inline__ __m128i __DEFAULT_FN_ATTRS
135_mm_abs_epi32(__m128i __a)
136{
137 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
138}
139
140/// \brief Concatenates the two 128-bit integer vector operands, and
141/// right-shifts the result by the number of bytes specified in the immediate
142/// operand.
143///
144/// \headerfile <x86intrin.h>
145///
146/// \code
147/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
148/// \endcode
149///
150/// This intrinsic corresponds to the \c PALIGNR instruction.
151///
152/// \param a
153/// A 128-bit vector of [16 x i8] containing one of the source operands.
154/// \param b
155/// A 128-bit vector of [16 x i8] containing one of the source operands.
156/// \param n
157/// An immediate operand specifying how many bytes to right-shift the result.
158/// \returns A 128-bit integer vector containing the concatenated right-shifted
159/// value.
160#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
161 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
162 (__v16qi)(__m128i)(b), (n)); })
163
164/// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
165/// the result by the number of bytes specified in the immediate operand.
166///
167/// \headerfile <x86intrin.h>
168///
169/// \code
170/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
171/// \endcode
172///
173/// This intrinsic corresponds to the \c PALIGNR instruction.
174///
175/// \param a
176/// A 64-bit vector of [8 x i8] containing one of the source operands.
177/// \param b
178/// A 64-bit vector of [8 x i8] containing one of the source operands.
179/// \param n
180/// An immediate operand specifying how many bytes to right-shift the result.
181/// \returns A 64-bit integer vector containing the concatenated right-shifted
182/// value.
183#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
184 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
185
186/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
187/// 128-bit vectors of [8 x i16].
188///
189/// \headerfile <x86intrin.h>
190///
191/// This intrinsic corresponds to the \c VPHADDW instruction.
192///
193/// \param __a
194/// A 128-bit vector of [8 x i16] containing one of the source operands. The
195/// horizontal sums of the values are stored in the lower bits of the
196/// destination.
197/// \param __b
198/// A 128-bit vector of [8 x i16] containing one of the source operands. The
199/// horizontal sums of the values are stored in the upper bits of the
200/// destination.
201/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
202/// both operands.
203static __inline__ __m128i __DEFAULT_FN_ATTRS
204_mm_hadd_epi16(__m128i __a, __m128i __b)
205{
206 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
207}
208
209/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
210/// 128-bit vectors of [4 x i32].
211///
212/// \headerfile <x86intrin.h>
213///
214/// This intrinsic corresponds to the \c VPHADDD instruction.
215///
216/// \param __a
217/// A 128-bit vector of [4 x i32] containing one of the source operands. The
218/// horizontal sums of the values are stored in the lower bits of the
219/// destination.
220/// \param __b
221/// A 128-bit vector of [4 x i32] containing one of the source operands. The
222/// horizontal sums of the values are stored in the upper bits of the
223/// destination.
224/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
225/// both operands.
226static __inline__ __m128i __DEFAULT_FN_ATTRS
227_mm_hadd_epi32(__m128i __a, __m128i __b)
228{
229 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
230}
231
232/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
233/// 64-bit vectors of [4 x i16].
234///
235/// \headerfile <x86intrin.h>
236///
237/// This intrinsic corresponds to the \c PHADDW instruction.
238///
239/// \param __a
240/// A 64-bit vector of [4 x i16] containing one of the source operands. The
241/// horizontal sums of the values are stored in the lower bits of the
242/// destination.
243/// \param __b
244/// A 64-bit vector of [4 x i16] containing one of the source operands. The
245/// horizontal sums of the values are stored in the upper bits of the
246/// destination.
247/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
248/// operands.
249static __inline__ __m64 __DEFAULT_FN_ATTRS
250_mm_hadd_pi16(__m64 __a, __m64 __b)
251{
252 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
253}
254
255/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
256/// 64-bit vectors of [2 x i32].
257///
258/// \headerfile <x86intrin.h>
259///
260/// This intrinsic corresponds to the \c PHADDD instruction.
261///
262/// \param __a
263/// A 64-bit vector of [2 x i32] containing one of the source operands. The
264/// horizontal sums of the values are stored in the lower bits of the
265/// destination.
266/// \param __b
267/// A 64-bit vector of [2 x i32] containing one of the source operands. The
268/// horizontal sums of the values are stored in the upper bits of the
269/// destination.
270/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
271/// operands.
272static __inline__ __m64 __DEFAULT_FN_ATTRS
273_mm_hadd_pi32(__m64 __a, __m64 __b)
274{
275 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
276}
277
278/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
279/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
280/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
281///
282/// \headerfile <x86intrin.h>
283///
284/// This intrinsic corresponds to the \c VPHADDSW instruction.
285///
286/// \param __a
287/// A 128-bit vector of [8 x i16] containing one of the source operands. The
288/// horizontal sums of the values are stored in the lower bits of the
289/// destination.
290/// \param __b
291/// A 128-bit vector of [8 x i16] containing one of the source operands. The
292/// horizontal sums of the values are stored in the upper bits of the
293/// destination.
294/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
295/// sums of both operands.
296static __inline__ __m128i __DEFAULT_FN_ATTRS
297_mm_hadds_epi16(__m128i __a, __m128i __b)
298{
299 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
300}
301
302/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
303/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
304/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
305///
306/// \headerfile <x86intrin.h>
307///
308/// This intrinsic corresponds to the \c PHADDSW instruction.
309///
310/// \param __a
311/// A 64-bit vector of [4 x i16] containing one of the source operands. The
312/// horizontal sums of the values are stored in the lower bits of the
313/// destination.
314/// \param __b
315/// A 64-bit vector of [4 x i16] containing one of the source operands. The
316/// horizontal sums of the values are stored in the upper bits of the
317/// destination.
318/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319/// sums of both operands.
320static __inline__ __m64 __DEFAULT_FN_ATTRS
321_mm_hadds_pi16(__m64 __a, __m64 __b)
322{
323 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
324}
325
326/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
327/// packed 128-bit vectors of [8 x i16].
328///
329/// \headerfile <x86intrin.h>
330///
331/// This intrinsic corresponds to the \c VPHSUBW instruction.
332///
333/// \param __a
334/// A 128-bit vector of [8 x i16] containing one of the source operands. The
335/// horizontal differences between the values are stored in the lower bits of
336/// the destination.
337/// \param __b
338/// A 128-bit vector of [8 x i16] containing one of the source operands. The
339/// horizontal differences between the values are stored in the upper bits of
340/// the destination.
341/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342/// of both operands.
343static __inline__ __m128i __DEFAULT_FN_ATTRS
344_mm_hsub_epi16(__m128i __a, __m128i __b)
345{
346 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
347}
348
349/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
350/// packed 128-bit vectors of [4 x i32].
351///
352/// \headerfile <x86intrin.h>
353///
354/// This intrinsic corresponds to the \c VPHSUBD instruction.
355///
356/// \param __a
357/// A 128-bit vector of [4 x i32] containing one of the source operands. The
358/// horizontal differences between the values are stored in the lower bits of
359/// the destination.
360/// \param __b
361/// A 128-bit vector of [4 x i32] containing one of the source operands. The
362/// horizontal differences between the values are stored in the upper bits of
363/// the destination.
364/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
365/// of both operands.
366static __inline__ __m128i __DEFAULT_FN_ATTRS
367_mm_hsub_epi32(__m128i __a, __m128i __b)
368{
369 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
370}
371
372/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
373/// packed 64-bit vectors of [4 x i16].
374///
375/// \headerfile <x86intrin.h>
376///
377/// This intrinsic corresponds to the \c PHSUBW instruction.
378///
379/// \param __a
380/// A 64-bit vector of [4 x i16] containing one of the source operands. The
381/// horizontal differences between the values are stored in the lower bits of
382/// the destination.
383/// \param __b
384/// A 64-bit vector of [4 x i16] containing one of the source operands. The
385/// horizontal differences between the values are stored in the upper bits of
386/// the destination.
387/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
388/// of both operands.
389static __inline__ __m64 __DEFAULT_FN_ATTRS
390_mm_hsub_pi16(__m64 __a, __m64 __b)
391{
392 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
393}
394
395/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
396/// packed 64-bit vectors of [2 x i32].
397///
398/// \headerfile <x86intrin.h>
399///
400/// This intrinsic corresponds to the \c PHSUBD instruction.
401///
402/// \param __a
403/// A 64-bit vector of [2 x i32] containing one of the source operands. The
404/// horizontal differences between the values are stored in the lower bits of
405/// the destination.
406/// \param __b
407/// A 64-bit vector of [2 x i32] containing one of the source operands. The
408/// horizontal differences between the values are stored in the upper bits of
409/// the destination.
410/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
411/// of both operands.
412static __inline__ __m64 __DEFAULT_FN_ATTRS
413_mm_hsub_pi32(__m64 __a, __m64 __b)
414{
415 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
416}
417
418/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
419/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
420/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
421/// saturated to 8000h.
422///
423/// \headerfile <x86intrin.h>
424///
425/// This intrinsic corresponds to the \c VPHSUBSW instruction.
426///
427/// \param __a
428/// A 128-bit vector of [8 x i16] containing one of the source operands. The
429/// horizontal differences between the values are stored in the lower bits of
430/// the destination.
431/// \param __b
432/// A 128-bit vector of [8 x i16] containing one of the source operands. The
433/// horizontal differences between the values are stored in the upper bits of
434/// the destination.
435/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
436/// differences of both operands.
437static __inline__ __m128i __DEFAULT_FN_ATTRS
438_mm_hsubs_epi16(__m128i __a, __m128i __b)
439{
440 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
441}
442
443/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
444/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
445/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
446/// saturated to 8000h.
447///
448/// \headerfile <x86intrin.h>
449///
450/// This intrinsic corresponds to the \c PHSUBSW instruction.
451///
452/// \param __a
453/// A 64-bit vector of [4 x i16] containing one of the source operands. The
454/// horizontal differences between the values are stored in the lower bits of
455/// the destination.
456/// \param __b
457/// A 64-bit vector of [4 x i16] containing one of the source operands. The
458/// horizontal differences between the values are stored in the upper bits of
459/// the destination.
460/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
461/// differences of both operands.
462static __inline__ __m64 __DEFAULT_FN_ATTRS
463_mm_hsubs_pi16(__m64 __a, __m64 __b)
464{
465 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
466}
467
468/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
469/// values contained in the first source operand and packed 8-bit signed
470/// integer values contained in the second source operand, adds pairs of
471/// contiguous products with signed saturation, and writes the 16-bit sums to
472/// the corresponding bits in the destination. For example, bits [7:0] of
473/// both operands are multiplied, bits [15:8] of both operands are
474/// multiplied, and the sum of both results is written to bits [15:0] of the
475/// destination.
476///
477/// \headerfile <x86intrin.h>
478///
479/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
480///
481/// \param __a
482/// A 128-bit integer vector containing the first source operand.
483/// \param __b
484/// A 128-bit integer vector containing the second source operand.
485/// \returns A 128-bit integer vector containing the sums of products of both
486/// operands:
487/// R0 := (__a0 * __b0) + (__a1 * __b1)
488/// R1 := (__a2 * __b2) + (__a3 * __b3)
489/// R2 := (__a4 * __b4) + (__a5 * __b5)
490/// R3 := (__a6 * __b6) + (__a7 * __b7)
491/// R4 := (__a8 * __b8) + (__a9 * __b9)
492/// R5 := (__a10 * __b10) + (__a11 * __b11)
493/// R6 := (__a12 * __b12) + (__a13 * __b13)
494/// R7 := (__a14 * __b14) + (__a15 * __b15)
495static __inline__ __m128i __DEFAULT_FN_ATTRS
496_mm_maddubs_epi16(__m128i __a, __m128i __b)
497{
498 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
499}
500
501/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
502/// values contained in the first source operand and packed 8-bit signed
503/// integer values contained in the second source operand, adds pairs of
504/// contiguous products with signed saturation, and writes the 16-bit sums to
505/// the corresponding bits in the destination. For example, bits [7:0] of
506/// both operands are multiplied, bits [15:8] of both operands are
507/// multiplied, and the sum of both results is written to bits [15:0] of the
508/// destination.
509///
510/// \headerfile <x86intrin.h>
511///
512/// This intrinsic corresponds to the \c PMADDUBSW instruction.
513///
514/// \param __a
515/// A 64-bit integer vector containing the first source operand.
516/// \param __b
517/// A 64-bit integer vector containing the second source operand.
518/// \returns A 64-bit integer vector containing the sums of products of both
519/// operands:
520/// R0 := (__a0 * __b0) + (__a1 * __b1)
521/// R1 := (__a2 * __b2) + (__a3 * __b3)
522/// R2 := (__a4 * __b4) + (__a5 * __b5)
523/// R3 := (__a6 * __b6) + (__a7 * __b7)
524static __inline__ __m64 __DEFAULT_FN_ATTRS
525_mm_maddubs_pi16(__m64 __a, __m64 __b)
526{
527 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
528}
529
530/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
531/// products to the 18 most significant bits by right-shifting, rounds the
532/// truncated value by adding 1, and writes bits [16:1] to the destination.
533///
534/// \headerfile <x86intrin.h>
535///
536/// This intrinsic corresponds to the \c VPMULHRSW instruction.
537///
538/// \param __a
539/// A 128-bit vector of [8 x i16] containing one of the source operands.
540/// \param __b
541/// A 128-bit vector of [8 x i16] containing one of the source operands.
542/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
543/// products of both operands.
544static __inline__ __m128i __DEFAULT_FN_ATTRS
545_mm_mulhrs_epi16(__m128i __a, __m128i __b)
546{
547 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
548}
549
550/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
551/// products to the 18 most significant bits by right-shifting, rounds the
552/// truncated value by adding 1, and writes bits [16:1] to the destination.
553///
554/// \headerfile <x86intrin.h>
555///
556/// This intrinsic corresponds to the \c PMULHRSW instruction.
557///
558/// \param __a
559/// A 64-bit vector of [4 x i16] containing one of the source operands.
560/// \param __b
561/// A 64-bit vector of [4 x i16] containing one of the source operands.
562/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
563/// products of both operands.
564static __inline__ __m64 __DEFAULT_FN_ATTRS
565_mm_mulhrs_pi16(__m64 __a, __m64 __b)
566{
567 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
568}
569
570/// \brief Copies the 8-bit integers from a 128-bit integer vector to the
571/// destination or clears 8-bit values in the destination, as specified by
572/// the second source operand.
573///
574/// \headerfile <x86intrin.h>
575///
576/// This intrinsic corresponds to the \c VPSHUFB instruction.
577///
578/// \param __a
579/// A 128-bit integer vector containing the values to be copied.
580/// \param __b
581/// A 128-bit integer vector containing control bytes corresponding to
582/// positions in the destination:
583/// Bit 7:
584/// 1: Clear the corresponding byte in the destination.
585/// 0: Copy the selected source byte to the corresponding byte in the
586/// destination.
587/// Bits [6:4] Reserved.
588/// Bits [3:0] select the source byte to be copied.
589/// \returns A 128-bit integer vector containing the copied or cleared values.
590static __inline__ __m128i __DEFAULT_FN_ATTRS
591_mm_shuffle_epi8(__m128i __a, __m128i __b)
592{
593 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
594}
595
596/// \brief Copies the 8-bit integers from a 64-bit integer vector to the
597/// destination or clears 8-bit values in the destination, as specified by
598/// the second source operand.
599///
600/// \headerfile <x86intrin.h>
601///
602/// This intrinsic corresponds to the \c PSHUFB instruction.
603///
604/// \param __a
605/// A 64-bit integer vector containing the values to be copied.
606/// \param __b
607/// A 64-bit integer vector containing control bytes corresponding to
608/// positions in the destination:
609/// Bit 7:
610/// 1: Clear the corresponding byte in the destination.
611/// 0: Copy the selected source byte to the corresponding byte in the
612/// destination.
613/// Bits [3:0] select the source byte to be copied.
614/// \returns A 64-bit integer vector containing the copied or cleared values.
615static __inline__ __m64 __DEFAULT_FN_ATTRS
616_mm_shuffle_pi8(__m64 __a, __m64 __b)
617{
618 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
619}
620
621/// \brief For each 8-bit integer in the first source operand, perform one of
622/// the following actions as specified by the second source operand: If the
623/// byte in the second source is negative, calculate the two's complement of
624/// the corresponding byte in the first source, and write that value to the
625/// destination. If the byte in the second source is positive, copy the
626/// corresponding byte from the first source to the destination. If the byte
627/// in the second source is zero, clear the corresponding byte in the
628/// destination.
629///
630/// \headerfile <x86intrin.h>
631///
632/// This intrinsic corresponds to the \c VPSIGNB instruction.
633///
634/// \param __a
635/// A 128-bit integer vector containing the values to be copied.
636/// \param __b
637/// A 128-bit integer vector containing control bytes corresponding to
638/// positions in the destination.
639/// \returns A 128-bit integer vector containing the resultant values.
640static __inline__ __m128i __DEFAULT_FN_ATTRS
641_mm_sign_epi8(__m128i __a, __m128i __b)
642{
643 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
644}
645
646/// \brief For each 16-bit integer in the first source operand, perform one of
647/// the following actions as specified by the second source operand: If the
648/// word in the second source is negative, calculate the two's complement of
649/// the corresponding word in the first source, and write that value to the
650/// destination. If the word in the second source is positive, copy the
651/// corresponding word from the first source to the destination. If the word
652/// in the second source is zero, clear the corresponding word in the
653/// destination.
654///
655/// \headerfile <x86intrin.h>
656///
657/// This intrinsic corresponds to the \c VPSIGNW instruction.
658///
659/// \param __a
660/// A 128-bit integer vector containing the values to be copied.
661/// \param __b
662/// A 128-bit integer vector containing control words corresponding to
663/// positions in the destination.
664/// \returns A 128-bit integer vector containing the resultant values.
665static __inline__ __m128i __DEFAULT_FN_ATTRS
666_mm_sign_epi16(__m128i __a, __m128i __b)
667{
668 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
669}
670
671/// \brief For each 32-bit integer in the first source operand, perform one of
672/// the following actions as specified by the second source operand: If the
673/// doubleword in the second source is negative, calculate the two's
674/// complement of the corresponding word in the first source, and write that
675/// value to the destination. If the doubleword in the second source is
676/// positive, copy the corresponding word from the first source to the
677/// destination. If the doubleword in the second source is zero, clear the
678/// corresponding word in the destination.
679///
680/// \headerfile <x86intrin.h>
681///
682/// This intrinsic corresponds to the \c VPSIGND instruction.
683///
684/// \param __a
685/// A 128-bit integer vector containing the values to be copied.
686/// \param __b
687/// A 128-bit integer vector containing control doublewords corresponding to
688/// positions in the destination.
689/// \returns A 128-bit integer vector containing the resultant values.
690static __inline__ __m128i __DEFAULT_FN_ATTRS
691_mm_sign_epi32(__m128i __a, __m128i __b)
692{
693 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
694}
695
696/// \brief For each 8-bit integer in the first source operand, perform one of
697/// the following actions as specified by the second source operand: If the
698/// byte in the second source is negative, calculate the two's complement of
699/// the corresponding byte in the first source, and write that value to the
700/// destination. If the byte in the second source is positive, copy the
701/// corresponding byte from the first source to the destination. If the byte
702/// in the second source is zero, clear the corresponding byte in the
703/// destination.
704///
705/// \headerfile <x86intrin.h>
706///
707/// This intrinsic corresponds to the \c PSIGNB instruction.
708///
709/// \param __a
710/// A 64-bit integer vector containing the values to be copied.
711/// \param __b
712/// A 64-bit integer vector containing control bytes corresponding to
713/// positions in the destination.
714/// \returns A 64-bit integer vector containing the resultant values.
715static __inline__ __m64 __DEFAULT_FN_ATTRS
716_mm_sign_pi8(__m64 __a, __m64 __b)
717{
718 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
719}
720
721/// \brief For each 16-bit integer in the first source operand, perform one of
722/// the following actions as specified by the second source operand: If the
723/// word in the second source is negative, calculate the two's complement of
724/// the corresponding word in the first source, and write that value to the
725/// destination. If the word in the second source is positive, copy the
726/// corresponding word from the first source to the destination. If the word
727/// in the second source is zero, clear the corresponding word in the
728/// destination.
729///
730/// \headerfile <x86intrin.h>
731///
732/// This intrinsic corresponds to the \c PSIGNW instruction.
733///
734/// \param __a
735/// A 64-bit integer vector containing the values to be copied.
736/// \param __b
737/// A 64-bit integer vector containing control words corresponding to
738/// positions in the destination.
739/// \returns A 64-bit integer vector containing the resultant values.
740static __inline__ __m64 __DEFAULT_FN_ATTRS
741_mm_sign_pi16(__m64 __a, __m64 __b)
742{
743 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
744}
745
746/// \brief For each 32-bit integer in the first source operand, perform one of
747/// the following actions as specified by the second source operand: If the
748/// doubleword in the second source is negative, calculate the two's
749/// complement of the corresponding doubleword in the first source, and
750/// write that value to the destination. If the doubleword in the second
751/// source is positive, copy the corresponding doubleword from the first
752/// source to the destination. If the doubleword in the second source is
753/// zero, clear the corresponding doubleword in the destination.
754///
755/// \headerfile <x86intrin.h>
756///
757/// This intrinsic corresponds to the \c PSIGND instruction.
758///
759/// \param __a
760/// A 64-bit integer vector containing the values to be copied.
761/// \param __b
762/// A 64-bit integer vector containing two control doublewords corresponding
763/// to positions in the destination.
764/// \returns A 64-bit integer vector containing the resultant values.
765static __inline__ __m64 __DEFAULT_FN_ATTRS
766_mm_sign_pi32(__m64 __a, __m64 __b)
767{
768 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
769}
770
771#undef __DEFAULT_FN_ATTRS
772
773#endif /* __TMMINTRIN_H */