blob: 5ac89dca1d5ec6c765bcf9e405282039a7b3ea2b [file] [log] [blame]
Benjamin Kramerae8ea1f2010-08-20 16:47:17 +00001/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
Anders Carlssonf15e71d2008-12-24 01:45:22 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
Benjamin Kramerae8ea1f2010-08-20 16:47:17 +000023
Anders Carlssonf15e71d2008-12-24 01:45:22 +000024#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
Anders Carlssonf15e71d2008-12-24 01:45:22 +000027#include <xmmintrin.h>
28
29typedef double __m128d __attribute__((__vector_size__(16)));
30typedef long long __m128i __attribute__((__vector_size__(16)));
31
Eric Christopher2a9898f2010-08-26 02:09:25 +000032/* Type defines. */
33typedef double __v2df __attribute__ ((__vector_size__ (16)));
34typedef long long __v2di __attribute__ ((__vector_size__ (16)));
Anders Carlssona283f912008-12-24 02:41:00 +000035typedef short __v8hi __attribute__((__vector_size__(16)));
Anders Carlsson327c8df2009-09-18 19:18:19 +000036typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlssonf15e71d2008-12-24 01:45:22 +000037
Simon Pilgrim6d1a0c42016-05-29 18:49:08 +000038/* Unsigned types */
39typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
Simon Pilgrim6d1a0c42016-05-29 18:49:08 +000040typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
41typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
42
Chandler Carruthcbe64112015-10-01 23:40:12 +000043/* We need an explicitly signed variant for char. Note that this shouldn't
44 * appear in the interface though. */
45typedef signed char __v16qs __attribute__((__vector_size__(16)));
46
Michael Kupersteina10dff92015-09-21 13:34:47 +000047#include <f16cintrin.h>
48
Eric Christopher4d1851682015-06-17 07:09:20 +000049/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +000050#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
Eric Christopher4d1851682015-06-17 07:09:20 +000051
Ekaterina Romanova493091f2016-10-20 17:59:15 +000052/// \brief Adds lower double-precision values in both operands and returns the
53/// sum in the lower 64 bits of the result. The upper 64 bits of the result
54/// are copied from the upper double-precision value of the first operand.
55///
56/// \headerfile <x86intrin.h>
57///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +000058/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +000059///
60/// \param __a
61/// A 128-bit vector of [2 x double] containing one of the source operands.
62/// \param __b
63/// A 128-bit vector of [2 x double] containing one of the source operands.
64/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
65/// sum of the lower 64 bits of both operands. The upper 64 bits are copied
66/// from the upper 64 bits of the first source operand.
Michael Kupersteine45af542015-06-30 13:36:19 +000067static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000068_mm_add_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +000069{
David Blaikie3302f2b2013-01-16 23:08:36 +000070 __a[0] += __b[0];
71 return __a;
Anders Carlssonf15e71d2008-12-24 01:45:22 +000072}
73
Ekaterina Romanova493091f2016-10-20 17:59:15 +000074/// \brief Adds two 128-bit vectors of [2 x double].
75///
76/// \headerfile <x86intrin.h>
77///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +000078/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +000079///
80/// \param __a
81/// A 128-bit vector of [2 x double] containing one of the source operands.
82/// \param __b
83/// A 128-bit vector of [2 x double] containing one of the source operands.
84/// \returns A 128-bit vector of [2 x double] containing the sums of both
85/// operands.
Michael Kupersteine45af542015-06-30 13:36:19 +000086static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000087_mm_add_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +000088{
Craig Topper1aa231e2016-05-16 06:38:42 +000089 return (__m128d)((__v2df)__a + (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +000090}
91
Ekaterina Romanova493091f2016-10-20 17:59:15 +000092/// \brief Subtracts the lower double-precision value of the second operand
93/// from the lower double-precision value of the first operand and returns
94/// the difference in the lower 64 bits of the result. The upper 64 bits of
95/// the result are copied from the upper double-precision value of the first
96/// operand.
97///
98/// \headerfile <x86intrin.h>
99///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000100/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000101///
102/// \param __a
103/// A 128-bit vector of [2 x double] containing the minuend.
104/// \param __b
105/// A 128-bit vector of [2 x double] containing the subtrahend.
106/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
107/// difference of the lower 64 bits of both operands. The upper 64 bits are
108/// copied from the upper 64 bits of the first source operand.
Michael Kupersteine45af542015-06-30 13:36:19 +0000109static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000110_mm_sub_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000111{
David Blaikie3302f2b2013-01-16 23:08:36 +0000112 __a[0] -= __b[0];
113 return __a;
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000114}
115
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000116/// \brief Subtracts two 128-bit vectors of [2 x double].
117///
118/// \headerfile <x86intrin.h>
119///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000120/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000121///
122/// \param __a
123/// A 128-bit vector of [2 x double] containing the minuend.
124/// \param __b
125/// A 128-bit vector of [2 x double] containing the subtrahend.
126/// \returns A 128-bit vector of [2 x double] containing the differences between
127/// both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000128static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000129_mm_sub_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000130{
Craig Topper1aa231e2016-05-16 06:38:42 +0000131 return (__m128d)((__v2df)__a - (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000132}
133
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000134/// \brief Multiplies lower double-precision values in both operands and returns
135/// the product in the lower 64 bits of the result. The upper 64 bits of the
136/// result are copied from the upper double-precision value of the first
137/// operand.
138///
139/// \headerfile <x86intrin.h>
140///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000141/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000142///
143/// \param __a
144/// A 128-bit vector of [2 x double] containing one of the source operands.
145/// \param __b
146/// A 128-bit vector of [2 x double] containing one of the source operands.
147/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
148/// product of the lower 64 bits of both operands. The upper 64 bits are
149/// copied from the upper 64 bits of the first source operand.
Michael Kupersteine45af542015-06-30 13:36:19 +0000150static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000151_mm_mul_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000152{
David Blaikie3302f2b2013-01-16 23:08:36 +0000153 __a[0] *= __b[0];
154 return __a;
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000155}
156
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000157/// \brief Multiplies two 128-bit vectors of [2 x double].
158///
159/// \headerfile <x86intrin.h>
160///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000161/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000162///
163/// \param __a
164/// A 128-bit vector of [2 x double] containing one of the operands.
165/// \param __b
166/// A 128-bit vector of [2 x double] containing one of the operands.
167/// \returns A 128-bit vector of [2 x double] containing the products of both
168/// operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000169static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000170_mm_mul_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000171{
Craig Topper1aa231e2016-05-16 06:38:42 +0000172 return (__m128d)((__v2df)__a * (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000173}
174
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000175/// \brief Divides the lower double-precision value of the first operand by the
176/// lower double-precision value of the second operand and returns the
177/// quotient in the lower 64 bits of the result. The upper 64 bits of the
178/// result are copied from the upper double-precision value of the first
179/// operand.
180///
181/// \headerfile <x86intrin.h>
182///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000183/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000184///
185/// \param __a
186/// A 128-bit vector of [2 x double] containing the dividend.
187/// \param __b
188/// A 128-bit vector of [2 x double] containing divisor.
189/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
190/// quotient of the lower 64 bits of both operands. The upper 64 bits are
191/// copied from the upper 64 bits of the first source operand.
Michael Kupersteine45af542015-06-30 13:36:19 +0000192static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000193_mm_div_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000194{
David Blaikie3302f2b2013-01-16 23:08:36 +0000195 __a[0] /= __b[0];
196 return __a;
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000197}
198
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000199/// \brief Performs an element-by-element division of two 128-bit vectors of
200/// [2 x double].
201///
202/// \headerfile <x86intrin.h>
203///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000204/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000205///
206/// \param __a
207/// A 128-bit vector of [2 x double] containing the dividend.
208/// \param __b
209/// A 128-bit vector of [2 x double] containing the divisor.
210/// \returns A 128-bit vector of [2 x double] containing the quotients of both
211/// operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000212static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000213_mm_div_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000214{
Craig Topper1aa231e2016-05-16 06:38:42 +0000215 return (__m128d)((__v2df)__a / (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000216}
217
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000218/// \brief Calculates the square root of the lower double-precision value of
219/// the second operand and returns it in the lower 64 bits of the result.
220/// The upper 64 bits of the result are copied from the upper double-
221/// precision value of the first operand.
222///
223/// \headerfile <x86intrin.h>
224///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000225/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000226///
227/// \param __a
228/// A 128-bit vector of [2 x double] containing one of the operands. The
229/// upper 64 bits of this operand are copied to the upper 64 bits of the
230/// result.
231/// \param __b
232/// A 128-bit vector of [2 x double] containing one of the operands. The
233/// square root is calculated using the lower 64 bits of this operand.
234/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000235/// square root of the lower 64 bits of operand \a __b, and whose upper 64
236/// bits are copied from the upper 64 bits of operand \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000237static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000238_mm_sqrt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000239{
Craig Topper1aa231e2016-05-16 06:38:42 +0000240 __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
David Blaikie3302f2b2013-01-16 23:08:36 +0000241 return (__m128d) { __c[0], __a[1] };
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000242}
243
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000244/// \brief Calculates the square root of the each of two values stored in a
245/// 128-bit vector of [2 x double].
246///
247/// \headerfile <x86intrin.h>
248///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000249/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000250///
251/// \param __a
252/// A 128-bit vector of [2 x double].
253/// \returns A 128-bit vector of [2 x double] containing the square roots of the
254/// values in the operand.
Michael Kupersteine45af542015-06-30 13:36:19 +0000255static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000256_mm_sqrt_pd(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000257{
Craig Topper1aa231e2016-05-16 06:38:42 +0000258 return __builtin_ia32_sqrtpd((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000259}
260
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000261/// \brief Compares lower 64-bit double-precision values of both operands, and
262/// returns the lesser of the pair of values in the lower 64-bits of the
263/// result. The upper 64 bits of the result are copied from the upper double-
264/// precision value of the first operand.
265///
266/// \headerfile <x86intrin.h>
267///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000268/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000269///
270/// \param __a
271/// A 128-bit vector of [2 x double] containing one of the operands. The
272/// lower 64 bits of this operand are used in the comparison.
273/// \param __b
274/// A 128-bit vector of [2 x double] containing one of the operands. The
275/// lower 64 bits of this operand are used in the comparison.
276/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
277/// minimum value between both operands. The upper 64 bits are copied from
278/// the upper 64 bits of the first source operand.
Michael Kupersteine45af542015-06-30 13:36:19 +0000279static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000280_mm_min_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000281{
Craig Topper1aa231e2016-05-16 06:38:42 +0000282 return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000283}
284
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000285/// \brief Performs element-by-element comparison of the two 128-bit vectors of
286/// [2 x double] and returns the vector containing the lesser of each pair of
287/// values.
288///
289/// \headerfile <x86intrin.h>
290///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000291/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000292///
293/// \param __a
294/// A 128-bit vector of [2 x double] containing one of the operands.
295/// \param __b
296/// A 128-bit vector of [2 x double] containing one of the operands.
297/// \returns A 128-bit vector of [2 x double] containing the minimum values
298/// between both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000299static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000300_mm_min_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000301{
Craig Topper1aa231e2016-05-16 06:38:42 +0000302 return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000303}
304
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000305/// \brief Compares lower 64-bits double-precision values of both operands, and
306/// returns the greater of the pair of values in the lower 64-bits of the
307/// result. The upper 64 bits of the result are copied from the upper double-
308/// precision value of the first operand.
309///
310/// \headerfile <x86intrin.h>
311///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000312/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000313///
314/// \param __a
315/// A 128-bit vector of [2 x double] containing one of the operands. The
316/// lower 64 bits of this operand are used in the comparison.
317/// \param __b
318/// A 128-bit vector of [2 x double] containing one of the operands. The
319/// lower 64 bits of this operand are used in the comparison.
320/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
321/// maximum value between both operands. The upper 64 bits are copied from
322/// the upper 64 bits of the first source operand.
Michael Kupersteine45af542015-06-30 13:36:19 +0000323static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000324_mm_max_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000325{
Craig Topper1aa231e2016-05-16 06:38:42 +0000326 return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000327}
328
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000329/// \brief Performs element-by-element comparison of the two 128-bit vectors of
330/// [2 x double] and returns the vector containing the greater of each pair
331/// of values.
332///
333/// \headerfile <x86intrin.h>
334///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000335/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000336///
337/// \param __a
338/// A 128-bit vector of [2 x double] containing one of the operands.
339/// \param __b
340/// A 128-bit vector of [2 x double] containing one of the operands.
341/// \returns A 128-bit vector of [2 x double] containing the maximum values
342/// between both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000343static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000344_mm_max_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000345{
Craig Topper1aa231e2016-05-16 06:38:42 +0000346 return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000347}
348
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000349/// \brief Performs a bitwise AND of two 128-bit vectors of [2 x double].
350///
351/// \headerfile <x86intrin.h>
352///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000353/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000354///
355/// \param __a
356/// A 128-bit vector of [2 x double] containing one of the source operands.
357/// \param __b
358/// A 128-bit vector of [2 x double] containing one of the source operands.
359/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
360/// values between both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000361static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000362_mm_and_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000363{
Craig Topperd0681d52016-08-31 05:38:55 +0000364 return (__m128d)((__v2du)__a & (__v2du)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000365}
366
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000367/// \brief Performs a bitwise AND of two 128-bit vectors of [2 x double], using
368/// the one's complement of the values contained in the first source operand.
369///
370/// \headerfile <x86intrin.h>
371///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000372/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000373///
374/// \param __a
375/// A 128-bit vector of [2 x double] containing the left source operand. The
376/// one's complement of this value is used in the bitwise AND.
377/// \param __b
378/// A 128-bit vector of [2 x double] containing the right source operand.
379/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
380/// values in the second operand and the one's complement of the first
381/// operand.
Michael Kupersteine45af542015-06-30 13:36:19 +0000382static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000383_mm_andnot_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000384{
Craig Topperd0681d52016-08-31 05:38:55 +0000385 return (__m128d)(~(__v2du)__a & (__v2du)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000386}
387
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000388/// \brief Performs a bitwise OR of two 128-bit vectors of [2 x double].
389///
390/// \headerfile <x86intrin.h>
391///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000392/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000393///
394/// \param __a
395/// A 128-bit vector of [2 x double] containing one of the source operands.
396/// \param __b
397/// A 128-bit vector of [2 x double] containing one of the source operands.
398/// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the
399/// values between both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000400static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000401_mm_or_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000402{
Craig Topperd0681d52016-08-31 05:38:55 +0000403 return (__m128d)((__v2du)__a | (__v2du)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000404}
405
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000406/// \brief Performs a bitwise XOR of two 128-bit vectors of [2 x double].
407///
408/// \headerfile <x86intrin.h>
409///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000410/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000411///
412/// \param __a
413/// A 128-bit vector of [2 x double] containing one of the source operands.
414/// \param __b
415/// A 128-bit vector of [2 x double] containing one of the source operands.
416/// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the
417/// values between both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +0000418static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000419_mm_xor_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000420{
Craig Topperd0681d52016-08-31 05:38:55 +0000421 return (__m128d)((__v2du)__a ^ (__v2du)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000422}
423
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000424/// \brief Compares each of the corresponding double-precision values of the
425/// 128-bit vectors of [2 x double] for equality. Each comparison yields 0h
426/// for false, FFFFFFFFFFFFFFFFh for true.
427///
428/// \headerfile <x86intrin.h>
429///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000430/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000431///
432/// \param __a
433/// A 128-bit vector of [2 x double].
434/// \param __b
435/// A 128-bit vector of [2 x double].
436/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000437static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000438_mm_cmpeq_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000439{
Craig Topper1aa231e2016-05-16 06:38:42 +0000440 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000441}
442
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000443/// \brief Compares each of the corresponding double-precision values of the
444/// 128-bit vectors of [2 x double] to determine if the values in the first
445/// operand are less than those in the second operand. Each comparison
446/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
447///
448/// \headerfile <x86intrin.h>
449///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000450/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000451///
452/// \param __a
453/// A 128-bit vector of [2 x double].
454/// \param __b
455/// A 128-bit vector of [2 x double].
456/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000457static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000458_mm_cmplt_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000459{
Craig Topper1aa231e2016-05-16 06:38:42 +0000460 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000461}
462
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000463/// \brief Compares each of the corresponding double-precision values of the
464/// 128-bit vectors of [2 x double] to determine if the values in the first
465/// operand are less than or equal to those in the second operand. Each
466/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
467///
468/// \headerfile <x86intrin.h>
469///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000470/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000471///
472/// \param __a
473/// A 128-bit vector of [2 x double].
474/// \param __b
475/// A 128-bit vector of [2 x double].
476/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000477static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000478_mm_cmple_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000479{
Craig Topper1aa231e2016-05-16 06:38:42 +0000480 return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000481}
482
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000483/// \brief Compares each of the corresponding double-precision values of the
484/// 128-bit vectors of [2 x double] to determine if the values in the first
485/// operand are greater than those in the second operand. Each comparison
486/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
487///
488/// \headerfile <x86intrin.h>
489///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000490/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000491///
492/// \param __a
493/// A 128-bit vector of [2 x double].
494/// \param __b
495/// A 128-bit vector of [2 x double].
496/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000497static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000498_mm_cmpgt_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000499{
Craig Topper1aa231e2016-05-16 06:38:42 +0000500 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000501}
502
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000503/// \brief Compares each of the corresponding double-precision values of the
504/// 128-bit vectors of [2 x double] to determine if the values in the first
505/// operand are greater than or equal to those in the second operand. Each
506/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
507///
508/// \headerfile <x86intrin.h>
509///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000510/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000511///
512/// \param __a
513/// A 128-bit vector of [2 x double].
514/// \param __b
515/// A 128-bit vector of [2 x double].
516/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000517static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000518_mm_cmpge_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000519{
Craig Topper1aa231e2016-05-16 06:38:42 +0000520 return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000521}
522
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000523/// \brief Compares each of the corresponding double-precision values of the
524/// 128-bit vectors of [2 x double] to determine if the values in the first
525/// operand are ordered with respect to those in the second operand. A pair
526/// of double-precision values are "ordered" with respect to each other if
527/// neither value is a NaN. Each comparison yields 0h for false,
528/// FFFFFFFFFFFFFFFFh for true.
529///
530/// \headerfile <x86intrin.h>
531///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000532/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000533///
534/// \param __a
535/// A 128-bit vector of [2 x double].
536/// \param __b
537/// A 128-bit vector of [2 x double].
538/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000539static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000540_mm_cmpord_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000541{
Craig Topper1aa231e2016-05-16 06:38:42 +0000542 return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000543}
544
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000545/// \brief Compares each of the corresponding double-precision values of the
546/// 128-bit vectors of [2 x double] to determine if the values in the first
547/// operand are unordered with respect to those in the second operand. A pair
548/// of double-precision values are "unordered" with respect to each other if
549/// one or both values are NaN. Each comparison yields 0h for false,
550/// FFFFFFFFFFFFFFFFh for true.
551///
552/// \headerfile <x86intrin.h>
553///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000554/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>
555/// instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000556///
557/// \param __a
558/// A 128-bit vector of [2 x double].
559/// \param __b
560/// A 128-bit vector of [2 x double].
561/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000562static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000563_mm_cmpunord_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000564{
Craig Topper1aa231e2016-05-16 06:38:42 +0000565 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000566}
567
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000568/// \brief Compares each of the corresponding double-precision values of the
569/// 128-bit vectors of [2 x double] to determine if the values in the first
570/// operand are unequal to those in the second operand. Each comparison
571/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
572///
573/// \headerfile <x86intrin.h>
574///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000575/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000576///
577/// \param __a
578/// A 128-bit vector of [2 x double].
579/// \param __b
580/// A 128-bit vector of [2 x double].
581/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000582static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000583_mm_cmpneq_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000584{
Craig Topper1aa231e2016-05-16 06:38:42 +0000585 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000586}
587
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000588/// \brief Compares each of the corresponding double-precision values of the
589/// 128-bit vectors of [2 x double] to determine if the values in the first
590/// operand are not less than those in the second operand. Each comparison
591/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
592///
593/// \headerfile <x86intrin.h>
594///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000595/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000596///
597/// \param __a
598/// A 128-bit vector of [2 x double].
599/// \param __b
600/// A 128-bit vector of [2 x double].
601/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000602static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000603_mm_cmpnlt_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000604{
Craig Topper1aa231e2016-05-16 06:38:42 +0000605 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000606}
607
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000608/// \brief Compares each of the corresponding double-precision values of the
609/// 128-bit vectors of [2 x double] to determine if the values in the first
610/// operand are not less than or equal to those in the second operand. Each
611/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
612///
613/// \headerfile <x86intrin.h>
614///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000615/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000616///
617/// \param __a
618/// A 128-bit vector of [2 x double].
619/// \param __b
620/// A 128-bit vector of [2 x double].
621/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000622static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000623_mm_cmpnle_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000624{
Craig Topper1aa231e2016-05-16 06:38:42 +0000625 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000626}
627
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000628/// \brief Compares each of the corresponding double-precision values of the
629/// 128-bit vectors of [2 x double] to determine if the values in the first
630/// operand are not greater than those in the second operand. Each
631/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
632///
633/// \headerfile <x86intrin.h>
634///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000635/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000636///
637/// \param __a
638/// A 128-bit vector of [2 x double].
639/// \param __b
640/// A 128-bit vector of [2 x double].
641/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000642static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000643_mm_cmpngt_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000644{
Craig Topper1aa231e2016-05-16 06:38:42 +0000645 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000646}
647
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000648/// \brief Compares each of the corresponding double-precision values of the
649/// 128-bit vectors of [2 x double] to determine if the values in the first
650/// operand are not greater than or equal to those in the second operand.
651/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
652///
653/// \headerfile <x86intrin.h>
654///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000655/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000656///
657/// \param __a
658/// A 128-bit vector of [2 x double].
659/// \param __b
660/// A 128-bit vector of [2 x double].
661/// \returns A 128-bit vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000662static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000663_mm_cmpnge_pd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000664{
Craig Topper1aa231e2016-05-16 06:38:42 +0000665 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000666}
667
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000668/// \brief Compares the lower double-precision floating-point values in each of
669/// the two 128-bit floating-point vectors of [2 x double] for equality. The
670/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
671///
672/// \headerfile <x86intrin.h>
673///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000674/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000675///
676/// \param __a
677/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000678/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000679/// \param __b
680/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000681/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000682/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000683/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000684static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000685_mm_cmpeq_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000686{
Craig Topper1aa231e2016-05-16 06:38:42 +0000687 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000688}
689
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000690/// \brief Compares the lower double-precision floating-point values in each of
691/// the two 128-bit floating-point vectors of [2 x double] to determine if
692/// the value in the first parameter is less than the corresponding value in
693/// the second parameter. The comparison yields 0h for false,
694/// FFFFFFFFFFFFFFFFh for true.
695///
696/// \headerfile <x86intrin.h>
697///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000698/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000699///
700/// \param __a
701/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000702/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000703/// \param __b
704/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000705/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000706/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000707/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000708static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000709_mm_cmplt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000710{
Craig Topper1aa231e2016-05-16 06:38:42 +0000711 return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000712}
713
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000714/// \brief Compares the lower double-precision floating-point values in each of
715/// the two 128-bit floating-point vectors of [2 x double] to determine if
716/// the value in the first parameter is less than or equal to the
717/// corresponding value in the second parameter. The comparison yields 0h for
718/// false, FFFFFFFFFFFFFFFFh for true.
719///
720/// \headerfile <x86intrin.h>
721///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000722/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000723///
724/// \param __a
725/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000726/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000727/// \param __b
728/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000729/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000730/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000731/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000732static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000733_mm_cmple_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000734{
Craig Topper1aa231e2016-05-16 06:38:42 +0000735 return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000736}
737
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000738/// \brief Compares the lower double-precision floating-point values in each of
739/// the two 128-bit floating-point vectors of [2 x double] to determine if
740/// the value in the first parameter is greater than the corresponding value
741/// in the second parameter. The comparison yields 0h for false,
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000742/// FFFFFFFFFFFFFFFFh for true.
743///
744/// \headerfile <x86intrin.h>
745///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000746/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000747///
748/// \param __a
749/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000750/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000751/// \param __b
752/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000753/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000754/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000755/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000756static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000757_mm_cmpgt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000758{
Craig Topper1aa231e2016-05-16 06:38:42 +0000759 __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
Manman Ren9bb34d62013-06-17 19:42:49 +0000760 return (__m128d) { __c[0], __a[1] };
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000761}
762
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000763/// \brief Compares the lower double-precision floating-point values in each of
764/// the two 128-bit floating-point vectors of [2 x double] to determine if
765/// the value in the first parameter is greater than or equal to the
766/// corresponding value in the second parameter. The comparison yields 0h for
767/// false, FFFFFFFFFFFFFFFFh for true.
768///
769/// \headerfile <x86intrin.h>
770///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000771/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000772///
773/// \param __a
774/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000775/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000776/// \param __b
777/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000778/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000779/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000780/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000781static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000782_mm_cmpge_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000783{
Craig Topper1aa231e2016-05-16 06:38:42 +0000784 __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
Manman Ren9bb34d62013-06-17 19:42:49 +0000785 return (__m128d) { __c[0], __a[1] };
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000786}
787
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000788/// \brief Compares the lower double-precision floating-point values in each of
789/// the two 128-bit floating-point vectors of [2 x double] to determine if
790/// the value in the first parameter is "ordered" with respect to the
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000791/// corresponding value in the second parameter. The comparison yields 0h for
792/// false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are
793/// "ordered" with respect to each other if neither value is a NaN.
794///
795/// \headerfile <x86intrin.h>
796///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000797/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000798///
799/// \param __a
800/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000801/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000802/// \param __b
803/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000804/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000805/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000806/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000807static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000808_mm_cmpord_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000809{
Craig Topper1aa231e2016-05-16 06:38:42 +0000810 return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000811}
812
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000813/// \brief Compares the lower double-precision floating-point values in each of
814/// the two 128-bit floating-point vectors of [2 x double] to determine if
815/// the value in the first parameter is "unordered" with respect to the
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000816/// corresponding value in the second parameter. The comparison yields 0h
817/// for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values
818/// are "unordered" with respect to each other if one or both values are NaN.
819///
820/// \headerfile <x86intrin.h>
821///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000822/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>
823/// instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000824///
825/// \param __a
826/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000827/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000828/// \param __b
829/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000830/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000831/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000832/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000833static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000834_mm_cmpunord_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000835{
Craig Topper1aa231e2016-05-16 06:38:42 +0000836 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000837}
838
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000839/// \brief Compares the lower double-precision floating-point values in each of
840/// the two 128-bit floating-point vectors of [2 x double] to determine if
841/// the value in the first parameter is unequal to the corresponding value in
842/// the second parameter. The comparison yields 0h for false,
843/// FFFFFFFFFFFFFFFFh for true.
844///
845/// \headerfile <x86intrin.h>
846///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000847/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000848///
849/// \param __a
850/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000851/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000852/// \param __b
853/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000854/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000855/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000856/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000857static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000858_mm_cmpneq_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000859{
Craig Topper1aa231e2016-05-16 06:38:42 +0000860 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000861}
862
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000863/// \brief Compares the lower double-precision floating-point values in each of
864/// the two 128-bit floating-point vectors of [2 x double] to determine if
865/// the value in the first parameter is not less than the corresponding
866/// value in the second parameter. The comparison yields 0h for false,
867/// FFFFFFFFFFFFFFFFh for true.
868///
869/// \headerfile <x86intrin.h>
870///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000871/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000872///
873/// \param __a
874/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000875/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000876/// \param __b
877/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000878/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000879/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000880/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000881static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000882_mm_cmpnlt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000883{
Craig Topper1aa231e2016-05-16 06:38:42 +0000884 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000885}
886
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000887/// \brief Compares the lower double-precision floating-point values in each of
888/// the two 128-bit floating-point vectors of [2 x double] to determine if
889/// the value in the first parameter is not less than or equal to the
890/// corresponding value in the second parameter. The comparison yields 0h
891/// for false, FFFFFFFFFFFFFFFFh for true.
892///
893/// \headerfile <x86intrin.h>
894///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000895/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000896///
897/// \param __a
898/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000899/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000900/// \param __b
901/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000902/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000903/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000904/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000905static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000906_mm_cmpnle_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000907{
Craig Topper1aa231e2016-05-16 06:38:42 +0000908 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000909}
910
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000911/// \brief Compares the lower double-precision floating-point values in each of
912/// the two 128-bit floating-point vectors of [2 x double] to determine if
913/// the value in the first parameter is not greater than the corresponding
914/// value in the second parameter. The comparison yields 0h for false,
915/// FFFFFFFFFFFFFFFFh for true.
916///
917/// \headerfile <x86intrin.h>
918///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000919/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000920///
921/// \param __a
922/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000923/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000924/// \param __b
925/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000926/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000927/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000928/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000929static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000930_mm_cmpngt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000931{
Craig Topper1aa231e2016-05-16 06:38:42 +0000932 __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
Manman Ren9bb34d62013-06-17 19:42:49 +0000933 return (__m128d) { __c[0], __a[1] };
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000934}
935
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000936/// \brief Compares the lower double-precision floating-point values in each of
937/// the two 128-bit floating-point vectors of [2 x double] to determine if
938/// the value in the first parameter is not greater than or equal to the
939/// corresponding value in the second parameter. The comparison yields 0h
940/// for false, FFFFFFFFFFFFFFFFh for true.
941///
942/// \headerfile <x86intrin.h>
943///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000944/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000945///
946/// \param __a
947/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000948/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000949/// \param __b
950/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000951/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000952/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000953/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +0000954static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000955_mm_cmpnge_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000956{
Craig Topper1aa231e2016-05-16 06:38:42 +0000957 __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
Manman Ren9bb34d62013-06-17 19:42:49 +0000958 return (__m128d) { __c[0], __a[1] };
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000959}
960
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000961/// \brief Compares the lower double-precision floating-point values in each of
962/// the two 128-bit floating-point vectors of [2 x double] for equality. The
963/// comparison yields 0 for false, 1 for true.
964///
965/// \headerfile <x86intrin.h>
966///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000967/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000968///
969/// \param __a
970/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000971/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000972/// \param __b
973/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000974/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000975/// \returns An integer containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000976static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000977_mm_comieq_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000978{
Craig Topper1aa231e2016-05-16 06:38:42 +0000979 return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +0000980}
981
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000982/// \brief Compares the lower double-precision floating-point values in each of
983/// the two 128-bit floating-point vectors of [2 x double] to determine if
984/// the value in the first parameter is less than the corresponding value in
985/// the second parameter. The comparison yields 0 for false, 1 for true.
986///
987/// \headerfile <x86intrin.h>
988///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000989/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000990///
991/// \param __a
992/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000993/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000994/// \param __b
995/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +0000996/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +0000997/// \returns An integer containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +0000998static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000999_mm_comilt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001000{
Craig Topper1aa231e2016-05-16 06:38:42 +00001001 return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001002}
1003
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001004/// \brief Compares the lower double-precision floating-point values in each of
1005/// the two 128-bit floating-point vectors of [2 x double] to determine if
1006/// the value in the first parameter is less than or equal to the
1007/// corresponding value in the second parameter. The comparison yields 0 for
1008/// false, 1 for true.
1009///
1010/// \headerfile <x86intrin.h>
1011///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001012/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001013///
1014/// \param __a
1015/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001016/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001017/// \param __b
1018/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001019/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001020/// \returns An integer containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001021static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001022_mm_comile_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001023{
Craig Topper1aa231e2016-05-16 06:38:42 +00001024 return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001025}
1026
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001027/// \brief Compares the lower double-precision floating-point values in each of
1028/// the two 128-bit floating-point vectors of [2 x double] to determine if
1029/// the value in the first parameter is greater than the corresponding value
1030/// in the second parameter. The comparison yields 0 for false, 1 for true.
1031///
1032/// \headerfile <x86intrin.h>
1033///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001034/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001035///
1036/// \param __a
1037/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001038/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001039/// \param __b
1040/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001041/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001042/// \returns An integer containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001043static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001044_mm_comigt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001045{
Craig Topper1aa231e2016-05-16 06:38:42 +00001046 return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001047}
1048
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001049/// \brief Compares the lower double-precision floating-point values in each of
1050/// the two 128-bit floating-point vectors of [2 x double] to determine if
1051/// the value in the first parameter is greater than or equal to the
1052/// corresponding value in the second parameter. The comparison yields 0 for
1053/// false, 1 for true.
1054///
1055/// \headerfile <x86intrin.h>
1056///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001057/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001058///
1059/// \param __a
1060/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001061/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001062/// \param __b
1063/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001064/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001065/// \returns An integer containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001066static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001067_mm_comige_sd(__m128d __a, __m128d __b)
Eli Friedman89c11332011-10-06 20:31:50 +00001068{
Craig Topper1aa231e2016-05-16 06:38:42 +00001069 return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
Eli Friedman89c11332011-10-06 20:31:50 +00001070}
1071
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001072/// \brief Compares the lower double-precision floating-point values in each of
1073/// the two 128-bit floating-point vectors of [2 x double] to determine if
1074/// the value in the first parameter is unequal to the corresponding value in
1075/// the second parameter. The comparison yields 0 for false, 1 for true.
1076///
1077/// \headerfile <x86intrin.h>
1078///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001079/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001080///
1081/// \param __a
1082/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001083/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001084/// \param __b
1085/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001086/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001087/// \returns An integer containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001088static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001089_mm_comineq_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001090{
Craig Topper1aa231e2016-05-16 06:38:42 +00001091 return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001092}
1093
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001094/// \brief Compares the lower double-precision floating-point values in each of
1095/// the two 128-bit floating-point vectors of [2 x double] for equality. The
1096/// comparison yields 0 for false, 1 for true. If either of the two lower
1097/// double-precision values is NaN, 1 is returned.
1098///
1099/// \headerfile <x86intrin.h>
1100///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001101/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001102///
1103/// \param __a
1104/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001105/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001106/// \param __b
1107/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001108/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001109/// \returns An integer containing the comparison results. If either of the two
1110/// lower double-precision values is NaN, 1 is returned.
Michael Kupersteine45af542015-06-30 13:36:19 +00001111static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001112_mm_ucomieq_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001113{
Craig Topper1aa231e2016-05-16 06:38:42 +00001114 return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001115}
1116
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001117/// \brief Compares the lower double-precision floating-point values in each of
1118/// the two 128-bit floating-point vectors of [2 x double] to determine if
1119/// the value in the first parameter is less than the corresponding value in
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00001120/// the second parameter. The comparison yields 0 for false, 1 for true. If
1121/// either of the two lower double-precision values is NaN, 1 is returned.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001122///
1123/// \headerfile <x86intrin.h>
1124///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001125/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001126///
1127/// \param __a
1128/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001129/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001130/// \param __b
1131/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001132/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001133/// \returns An integer containing the comparison results. If either of the two
1134/// lower double-precision values is NaN, 1 is returned.
Michael Kupersteine45af542015-06-30 13:36:19 +00001135static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001136_mm_ucomilt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001137{
Craig Topper1aa231e2016-05-16 06:38:42 +00001138 return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001139}
1140
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001141/// \brief Compares the lower double-precision floating-point values in each of
1142/// the two 128-bit floating-point vectors of [2 x double] to determine if
1143/// the value in the first parameter is less than or equal to the
1144/// corresponding value in the second parameter. The comparison yields 0 for
1145/// false, 1 for true. If either of the two lower double-precision values is
1146/// NaN, 1 is returned.
1147///
1148/// \headerfile <x86intrin.h>
1149///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001150/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001151///
1152/// \param __a
1153/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001154/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001155/// \param __b
1156/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001157/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001158/// \returns An integer containing the comparison results. If either of the two
1159/// lower double-precision values is NaN, 1 is returned.
Michael Kupersteine45af542015-06-30 13:36:19 +00001160static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001161_mm_ucomile_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001162{
Craig Topper1aa231e2016-05-16 06:38:42 +00001163 return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001164}
1165
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001166/// \brief Compares the lower double-precision floating-point values in each of
1167/// the two 128-bit floating-point vectors of [2 x double] to determine if
1168/// the value in the first parameter is greater than the corresponding value
1169/// in the second parameter. The comparison yields 0 for false, 1 for true.
1170/// If either of the two lower double-precision values is NaN, 0 is returned.
1171///
1172/// \headerfile <x86intrin.h>
1173///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001174/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001175///
1176/// \param __a
1177/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001178/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001179/// \param __b
1180/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001181/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001182/// \returns An integer containing the comparison results. If either of the two
1183/// lower double-precision values is NaN, 0 is returned.
Michael Kupersteine45af542015-06-30 13:36:19 +00001184static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001185_mm_ucomigt_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001186{
Craig Topper1aa231e2016-05-16 06:38:42 +00001187 return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001188}
1189
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001190/// \brief Compares the lower double-precision floating-point values in each of
1191/// the two 128-bit floating-point vectors of [2 x double] to determine if
1192/// the value in the first parameter is greater than or equal to the
1193/// corresponding value in the second parameter. The comparison yields 0 for
1194/// false, 1 for true. If either of the two lower double-precision values
1195/// is NaN, 0 is returned.
1196///
1197/// \headerfile <x86intrin.h>
1198///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001199/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001200///
1201/// \param __a
1202/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001203/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001204/// \param __b
1205/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001206/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova493091f2016-10-20 17:59:15 +00001207/// \returns An integer containing the comparison results. If either of the two
1208/// lower double-precision values is NaN, 0 is returned.
Michael Kupersteine45af542015-06-30 13:36:19 +00001209static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001210_mm_ucomige_sd(__m128d __a, __m128d __b)
Eli Friedmanf8cb4802011-08-29 21:26:24 +00001211{
Craig Topper1aa231e2016-05-16 06:38:42 +00001212 return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
Eli Friedmanf8cb4802011-08-29 21:26:24 +00001213}
1214
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001215/// \brief Compares the lower double-precision floating-point values in each of
1216/// the two 128-bit floating-point vectors of [2 x double] to determine if
1217/// the value in the first parameter is unequal to the corresponding value in
1218/// the second parameter. The comparison yields 0 for false, 1 for true. If
1219/// either of the two lower double-precision values is NaN, 0 is returned.
1220///
1221/// \headerfile <x86intrin.h>
1222///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001223/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001224///
1225/// \param __a
1226/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001227/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001228/// \param __b
1229/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001230/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001231/// \returns An integer containing the comparison result. If either of the two
1232/// lower double-precision values is NaN, 0 is returned.
Michael Kupersteine45af542015-06-30 13:36:19 +00001233static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001234_mm_ucomineq_sd(__m128d __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001235{
Craig Topper1aa231e2016-05-16 06:38:42 +00001236 return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001237}
1238
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001239/// \brief Converts the two double-precision floating-point elements of a
1240/// 128-bit vector of [2 x double] into two single-precision floating-point
1241/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].
1242/// The upper 64 bits of the result vector are set to zero.
1243///
1244/// \headerfile <x86intrin.h>
1245///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001246/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001247///
1248/// \param __a
1249/// A 128-bit vector of [2 x double].
1250/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1251/// converted values. The upper 64 bits are set to zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00001252static __inline__ __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001253_mm_cvtpd_ps(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001254{
Craig Topper1aa231e2016-05-16 06:38:42 +00001255 return __builtin_ia32_cvtpd2ps((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001256}
1257
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001258/// \brief Converts the lower two single-precision floating-point elements of a
1259/// 128-bit vector of [4 x float] into two double-precision floating-point
1260/// values, returned in a 128-bit vector of [2 x double]. The upper two
1261/// elements of the input vector are unused.
1262///
1263/// \headerfile <x86intrin.h>
1264///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001265/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001266///
1267/// \param __a
1268/// A 128-bit vector of [4 x float]. The lower two single-precision
1269/// floating-point elements are converted to double-precision values. The
1270/// upper two elements are unused.
1271/// \returns A 128-bit vector of [2 x double] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001272static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001273_mm_cvtps_pd(__m128 __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001274{
Simon Pilgrim90770c72016-05-23 22:13:02 +00001275 return (__m128d) __builtin_convertvector(
1276 __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001277}
1278
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001279/// \brief Converts the lower two integer elements of a 128-bit vector of
1280/// [4 x i32] into two double-precision floating-point values, returned in a
1281/// 128-bit vector of [2 x double]. The upper two elements of the input
1282/// vector are unused.
1283///
1284/// \headerfile <x86intrin.h>
1285///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001286/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001287///
1288/// \param __a
1289/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are
1290/// converted to double-precision values. The upper two elements are unused.
1291/// \returns A 128-bit vector of [2 x double] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001292static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001293_mm_cvtepi32_pd(__m128i __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001294{
Simon Pilgrim90770c72016-05-23 22:13:02 +00001295 return (__m128d) __builtin_convertvector(
1296 __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001297}
1298
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001299/// \brief Converts the two double-precision floating-point elements of a
1300/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
1301/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper
1302/// 64 bits of the result vector are set to zero.
1303///
1304/// \headerfile <x86intrin.h>
1305///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001306/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001307///
1308/// \param __a
1309/// A 128-bit vector of [2 x double].
1310/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1311/// converted values. The upper 64 bits are set to zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00001312static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001313_mm_cvtpd_epi32(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001314{
Craig Topper1aa231e2016-05-16 06:38:42 +00001315 return __builtin_ia32_cvtpd2dq((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001316}
1317
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001318/// \brief Converts the low-order element of a 128-bit vector of [2 x double]
1319/// into a 32-bit signed integer value.
1320///
1321/// \headerfile <x86intrin.h>
1322///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001323/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001324///
1325/// \param __a
1326/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1327/// conversion.
1328/// \returns A 32-bit signed integer containing the converted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00001329static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001330_mm_cvtsd_si32(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001331{
Craig Topper1aa231e2016-05-16 06:38:42 +00001332 return __builtin_ia32_cvtsd2si((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001333}
1334
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001335/// \brief Converts the lower double-precision floating-point element of a
1336/// 128-bit vector of [2 x double], in the second parameter, into a
1337/// single-precision floating-point value, returned in the lower 32 bits of a
1338/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are
1339/// copied from the upper 96 bits of the first parameter.
1340///
1341/// \headerfile <x86intrin.h>
1342///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001343/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001344///
1345/// \param __a
1346/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are
1347/// copied to the upper 96 bits of the result.
1348/// \param __b
1349/// A 128-bit vector of [2 x double]. The lower double-precision
1350/// floating-point element is used in the conversion.
1351/// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
1352/// converted value from the second parameter. The upper 96 bits are copied
1353/// from the upper 96 bits of the first parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00001354static __inline__ __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001355_mm_cvtsd_ss(__m128 __a, __m128d __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001356{
Simon Pilgrime3b9ee02016-07-20 10:18:01 +00001357 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001358}
1359
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001360/// \brief Converts a 32-bit signed integer value, in the second parameter, into
1361/// a double-precision floating-point value, returned in the lower 64 bits of
1362/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1363/// are copied from the upper 64 bits of the first parameter.
1364///
1365/// \headerfile <x86intrin.h>
1366///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001367/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001368///
1369/// \param __a
1370/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1371/// copied to the upper 64 bits of the result.
1372/// \param __b
1373/// A 32-bit signed integer containing the value to be converted.
1374/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1375/// converted value from the second parameter. The upper 64 bits are copied
1376/// from the upper 64 bits of the first parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00001377static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001378_mm_cvtsi32_sd(__m128d __a, int __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001379{
David Blaikie3302f2b2013-01-16 23:08:36 +00001380 __a[0] = __b;
1381 return __a;
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001382}
1383
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001384/// \brief Converts the lower single-precision floating-point element of a
1385/// 128-bit vector of [4 x float], in the second parameter, into a
1386/// double-precision floating-point value, returned in the lower 64 bits of
1387/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1388/// are copied from the upper 64 bits of the first parameter.
1389///
1390/// \headerfile <x86intrin.h>
1391///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001392/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001393///
1394/// \param __a
1395/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1396/// copied to the upper 64 bits of the result.
1397/// \param __b
1398/// A 128-bit vector of [4 x float]. The lower single-precision
1399/// floating-point element is used in the conversion.
1400/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1401/// converted value from the second parameter. The upper 64 bits are copied
1402/// from the upper 64 bits of the first parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00001403static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001404_mm_cvtss_sd(__m128d __a, __m128 __b)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001405{
David Blaikie3302f2b2013-01-16 23:08:36 +00001406 __a[0] = __b[0];
1407 return __a;
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001408}
1409
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001410/// \brief Converts the two double-precision floating-point elements of a
1411/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
1412/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. If the
1413/// result of either conversion is inexact, the result is truncated (rounded
1414/// towards zero) regardless of the current MXCSR setting. The upper 64 bits
1415/// of the result vector are set to zero.
1416///
1417/// \headerfile <x86intrin.h>
1418///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00001419/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>
1420/// instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001421///
1422/// \param __a
1423/// A 128-bit vector of [2 x double].
1424/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1425/// converted values. The upper 64 bits are set to zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00001426static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001427_mm_cvttpd_epi32(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001428{
Craig Topper1aa231e2016-05-16 06:38:42 +00001429 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001430}
1431
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001432/// \brief Converts the low-order element of a [2 x double] vector into a 32-bit
1433/// signed integer value, truncating the result when it is inexact.
1434///
1435/// \headerfile <x86intrin.h>
1436///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00001437/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
1438/// instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001439///
1440/// \param __a
1441/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1442/// conversion.
1443/// \returns A 32-bit signed integer containing the converted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00001444static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001445_mm_cvttsd_si32(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001446{
Simon Pilgrime3b9ee02016-07-20 10:18:01 +00001447 return __builtin_ia32_cvttsd2si((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001448}
1449
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001450/// \brief Converts the two double-precision floating-point elements of a
1451/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
1452/// returned in a 64-bit vector of [2 x i32].
1453///
1454/// \headerfile <x86intrin.h>
1455///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001456/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001457///
1458/// \param __a
1459/// A 128-bit vector of [2 x double].
1460/// \returns A 64-bit vector of [2 x i32] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001461static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001462_mm_cvtpd_pi32(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001463{
Craig Topper1aa231e2016-05-16 06:38:42 +00001464 return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001465}
1466
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001467/// \brief Converts the two double-precision floating-point elements of a
1468/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
1469/// returned in a 64-bit vector of [2 x i32]. If the result of either
1470/// conversion is inexact, the result is truncated (rounded towards zero)
1471/// regardless of the current MXCSR setting.
1472///
1473/// \headerfile <x86intrin.h>
1474///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001475/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001476///
1477/// \param __a
1478/// A 128-bit vector of [2 x double].
1479/// \returns A 64-bit vector of [2 x i32] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001480static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001481_mm_cvttpd_pi32(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001482{
Craig Topper1aa231e2016-05-16 06:38:42 +00001483 return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001484}
1485
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001486/// \brief Converts the two signed 32-bit integer elements of a 64-bit vector of
1487/// [2 x i32] into two double-precision floating-point values, returned in a
1488/// 128-bit vector of [2 x double].
1489///
1490/// \headerfile <x86intrin.h>
1491///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001492/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001493///
1494/// \param __a
1495/// A 64-bit vector of [2 x i32].
1496/// \returns A 128-bit vector of [2 x double] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001497static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001498_mm_cvtpi32_pd(__m64 __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001499{
David Blaikie3302f2b2013-01-16 23:08:36 +00001500 return __builtin_ia32_cvtpi2pd((__v2si)__a);
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001501}
1502
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001503/// \brief Returns the low-order element of a 128-bit vector of [2 x double] as
1504/// a double-precision floating-point value.
1505///
1506/// \headerfile <x86intrin.h>
1507///
1508/// This intrinsic has no corresponding instruction.
1509///
1510/// \param __a
1511/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.
1512/// \returns A double-precision floating-point value copied from the lower 64
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00001513/// bits of \a __a.
Michael Kupersteine45af542015-06-30 13:36:19 +00001514static __inline__ double __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001515_mm_cvtsd_f64(__m128d __a)
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001516{
David Blaikie3302f2b2013-01-16 23:08:36 +00001517 return __a[0];
Anders Carlssonf15e71d2008-12-24 01:45:22 +00001518}
1519
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001520/// \brief Loads a 128-bit floating-point vector of [2 x double] from an aligned
1521/// memory location.
1522///
1523/// \headerfile <x86intrin.h>
1524///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001525/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001526///
1527/// \param __dp
1528/// A pointer to a 128-bit memory location. The address of the memory
1529/// location has to be 16-byte aligned.
1530/// \returns A 128-bit vector of [2 x double] containing the loaded values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001531static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001532_mm_load_pd(double const *__dp)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001533{
David Blaikie3302f2b2013-01-16 23:08:36 +00001534 return *(__m128d*)__dp;
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001535}
1536
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001537/// \brief Loads a double-precision floating-point value from a specified memory
1538/// location and duplicates it to both vector elements of a 128-bit vector of
1539/// [2 x double].
1540///
1541/// \headerfile <x86intrin.h>
1542///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001543/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001544///
1545/// \param __dp
1546/// A pointer to a memory location containing a double-precision value.
1547/// \returns A 128-bit vector of [2 x double] containing the loaded and
1548/// duplicated values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001549static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001550_mm_load1_pd(double const *__dp)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001551{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001552 struct __mm_load1_pd_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00001553 double __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001554 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001555 double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
1556 return (__m128d){ __u, __u };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001557}
1558
Eli Friedmanf83c2582009-06-02 05:55:48 +00001559#define _mm_load_pd1(dp) _mm_load1_pd(dp)
1560
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001561/// \brief Loads two double-precision values, in reverse order, from an aligned
1562/// memory location into a 128-bit vector of [2 x double].
1563///
1564/// \headerfile <x86intrin.h>
1565///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00001566/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +
1567/// needed shuffling instructions. In AVX mode, the shuffling may be combined
1568/// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001569///
1570/// \param __dp
1571/// A 16-byte aligned pointer to an array of double-precision values to be
1572/// loaded in reverse order.
1573/// \returns A 128-bit vector of [2 x double] containing the reversed loaded
1574/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001575static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001576_mm_loadr_pd(double const *__dp)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001577{
David Blaikie3302f2b2013-01-16 23:08:36 +00001578 __m128d __u = *(__m128d*)__dp;
Craig Topper1aa231e2016-05-16 06:38:42 +00001579 return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001580}
1581
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001582/// \brief Loads a 128-bit floating-point vector of [2 x double] from an
1583/// unaligned memory location.
1584///
1585/// \headerfile <x86intrin.h>
1586///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001587/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001588///
1589/// \param __dp
1590/// A pointer to a 128-bit memory location. The address of the memory
1591/// location does not have to be aligned.
1592/// \returns A 128-bit vector of [2 x double] containing the loaded values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001593static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001594_mm_loadu_pd(double const *__dp)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001595{
Bill Wendling502931f2011-05-13 00:11:39 +00001596 struct __loadu_pd {
David Blaikie3302f2b2013-01-16 23:08:36 +00001597 __m128d __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00001598 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001599 return ((struct __loadu_pd*)__dp)->__v;
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001600}
1601
Ekaterina Romanova2e041c92017-01-13 01:14:08 +00001602/// \brief Loads a 64-bit integer value to the low element of a 128-bit integer
1603/// vector and clears the upper element.
1604///
1605/// \headerfile <x86intrin.h>
1606///
1607/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
1608///
1609/// \param __dp
1610/// A pointer to a 64-bit memory location. The address of the memory
1611/// location does not have to be aligned.
1612/// \returns A 128-bit vector of [2 x i64] containing the loaded value.
Asaf Badouh57819aa2016-06-26 13:51:54 +00001613static __inline__ __m128i __DEFAULT_FN_ATTRS
1614_mm_loadu_si64(void const *__a)
1615{
1616 struct __loadu_si64 {
1617 long long __v;
1618 } __attribute__((__packed__, __may_alias__));
1619 long long __u = ((struct __loadu_si64*)__a)->__v;
1620 return (__m128i){__u, 0L};
1621}
1622
Ekaterina Romanova2e041c92017-01-13 01:14:08 +00001623/// \brief Loads a 64-bit double-precision value to the low element of a
1624/// 128-bit integer vector and clears the upper element.
1625///
1626/// \headerfile <x86intrin.h>
1627///
1628/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
1629///
1630/// \param __dp
1631/// An pointer to a memory location containing a double-precision value.
1632/// The address of the memory location does not have to be aligned.
1633/// \returns A 128-bit vector of [2 x double] containing the loaded value.
Michael Kupersteine45af542015-06-30 13:36:19 +00001634static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001635_mm_load_sd(double const *__dp)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001636{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001637 struct __mm_load_sd_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00001638 double __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001639 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001640 double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
1641 return (__m128d){ __u, 0 };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001642}
1643
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001644/// \brief Loads a double-precision value into the high-order bits of a 128-bit
1645/// vector of [2 x double]. The low-order bits are copied from the low-order
1646/// bits of the first operand.
1647///
1648/// \headerfile <x86intrin.h>
1649///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001650/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001651///
1652/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00001653/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001654/// Bits [63:0] are written to bits [63:0] of the result.
1655/// \param __dp
1656/// A pointer to a 64-bit memory location containing a double-precision
1657/// floating-point value that is loaded. The loaded value is written to bits
1658/// [127:64] of the result. The address of the memory location does not have
1659/// to be aligned.
1660/// \returns A 128-bit vector of [2 x double] containing the moved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001661static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001662_mm_loadh_pd(__m128d __a, double const *__dp)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001663{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001664 struct __mm_loadh_pd_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00001665 double __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001666 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001667 double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
1668 return (__m128d){ __a[0], __u };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001669}
1670
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001671/// \brief Loads a double-precision value into the low-order bits of a 128-bit
1672/// vector of [2 x double]. The high-order bits are copied from the
1673/// high-order bits of the first operand.
1674///
1675/// \headerfile <x86intrin.h>
1676///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001677/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001678///
1679/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00001680/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001681/// Bits [127:64] are written to bits [127:64] of the result.
1682/// \param __dp
1683/// A pointer to a 64-bit memory location containing a double-precision
1684/// floating-point value that is loaded. The loaded value is written to bits
1685/// [63:0] of the result. The address of the memory location does not have to
1686/// be aligned.
1687/// \returns A 128-bit vector of [2 x double] containing the moved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001688static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001689_mm_loadl_pd(__m128d __a, double const *__dp)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001690{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001691 struct __mm_loadl_pd_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00001692 double __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001693 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001694 double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
1695 return (__m128d){ __u, __a[1] };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001696}
1697
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001698/// \brief Constructs a 128-bit floating-point vector of [2 x double] with
1699/// unspecified content. This could be used as an argument to another
1700/// intrinsic function where the argument is required but the value is not
1701/// actually used.
1702///
1703/// \headerfile <x86intrin.h>
1704///
1705/// This intrinsic has no corresponding instruction.
1706///
1707/// \returns A 128-bit floating-point vector of [2 x double] with unspecified
1708/// content.
Michael Kupersteine45af542015-06-30 13:36:19 +00001709static __inline__ __m128d __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +00001710_mm_undefined_pd(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +00001711{
1712 return (__m128d)__builtin_ia32_undef128();
1713}
1714
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001715/// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower
1716/// 64 bits of the vector are initialized with the specified double-precision
1717/// floating-point value. The upper 64 bits are set to zero.
1718///
1719/// \headerfile <x86intrin.h>
1720///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001721/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001722///
1723/// \param __w
1724/// A double-precision floating-point value used to initialize the lower 64
1725/// bits of the result.
1726/// \returns An initialized 128-bit floating-point vector of [2 x double]. The
1727/// lower 64 bits contain the value of the parameter. The upper 64 bits are
1728/// set to zero.
Simon Pilgrim5aba9922015-08-26 21:17:12 +00001729static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001730_mm_set_sd(double __w)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001731{
David Blaikie3302f2b2013-01-16 23:08:36 +00001732 return (__m128d){ __w, 0 };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001733}
1734
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001735/// \brief Constructs a 128-bit floating-point vector of [2 x double], with each
1736/// of the two double-precision floating-point vector elements set to the
1737/// specified double-precision floating-point value.
1738///
1739/// \headerfile <x86intrin.h>
1740///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001741/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001742///
1743/// \param __w
1744/// A double-precision floating-point value used to initialize each vector
1745/// element of the result.
1746/// \returns An initialized 128-bit floating-point vector of [2 x double].
Michael Kupersteine45af542015-06-30 13:36:19 +00001747static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001748_mm_set1_pd(double __w)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001749{
David Blaikie3302f2b2013-01-16 23:08:36 +00001750 return (__m128d){ __w, __w };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001751}
1752
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001753/// \brief Constructs a 128-bit floating-point vector of [2 x double]
1754/// initialized with the specified double-precision floating-point values.
1755///
1756/// \headerfile <x86intrin.h>
1757///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001758/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001759///
1760/// \param __w
1761/// A double-precision floating-point value used to initialize the upper 64
1762/// bits of the result.
1763/// \param __x
1764/// A double-precision floating-point value used to initialize the lower 64
1765/// bits of the result.
1766/// \returns An initialized 128-bit floating-point vector of [2 x double].
Michael Kupersteine45af542015-06-30 13:36:19 +00001767static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001768_mm_set_pd(double __w, double __x)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001769{
David Blaikie3302f2b2013-01-16 23:08:36 +00001770 return (__m128d){ __x, __w };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001771}
1772
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001773/// \brief Constructs a 128-bit floating-point vector of [2 x double],
1774/// initialized in reverse order with the specified double-precision
1775/// floating-point values.
1776///
1777/// \headerfile <x86intrin.h>
1778///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001779/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001780///
1781/// \param __w
1782/// A double-precision floating-point value used to initialize the lower 64
1783/// bits of the result.
1784/// \param __x
1785/// A double-precision floating-point value used to initialize the upper 64
1786/// bits of the result.
1787/// \returns An initialized 128-bit floating-point vector of [2 x double].
Michael Kupersteine45af542015-06-30 13:36:19 +00001788static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001789_mm_setr_pd(double __w, double __x)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001790{
David Blaikie3302f2b2013-01-16 23:08:36 +00001791 return (__m128d){ __w, __x };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001792}
1793
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001794/// \brief Constructs a 128-bit floating-point vector of [2 x double]
1795/// initialized to zero.
1796///
1797/// \headerfile <x86intrin.h>
1798///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001799/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001800///
1801/// \returns An initialized 128-bit floating-point vector of [2 x double] with
1802/// all elements set to zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00001803static __inline__ __m128d __DEFAULT_FN_ATTRS
Mike Stump5b31ed32009-02-13 14:24:50 +00001804_mm_setzero_pd(void)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001805{
1806 return (__m128d){ 0, 0 };
1807}
1808
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001809/// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower
1810/// 64 bits are set to the lower 64 bits of the second parameter. The upper
1811/// 64 bits are set to the upper 64 bits of the first parameter.
1812//
1813/// \headerfile <x86intrin.h>
1814///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001815/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001816///
1817/// \param __a
1818/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the
1819/// upper 64 bits of the result.
1820/// \param __b
1821/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the
1822/// lower 64 bits of the result.
1823/// \returns A 128-bit vector of [2 x double] containing the moved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001824static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001825_mm_move_sd(__m128d __a, __m128d __b)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001826{
David Blaikie3302f2b2013-01-16 23:08:36 +00001827 return (__m128d){ __b[0], __a[1] };
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001828}
1829
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001830/// \brief Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1831/// memory location.
1832///
1833/// \headerfile <x86intrin.h>
1834///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001835/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001836///
1837/// \param __dp
1838/// A pointer to a 64-bit memory location.
1839/// \param __a
1840/// A 128-bit vector of [2 x double] containing the value to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00001841static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001842_mm_store_sd(double *__dp, __m128d __a)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001843{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001844 struct __mm_store_sd_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00001845 double __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001846 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001847 ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001848}
1849
Michael Kupersteine45af542015-06-30 13:36:19 +00001850static __inline__ void __DEFAULT_FN_ATTRS
Simon Pilgrim645e1ad2016-05-30 17:55:25 +00001851_mm_store_pd(double *__dp, __m128d __a)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001852{
Simon Pilgrim645e1ad2016-05-30 17:55:25 +00001853 *(__m128d*)__dp = __a;
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001854}
1855
Michael Kupersteine45af542015-06-30 13:36:19 +00001856static __inline__ void __DEFAULT_FN_ATTRS
Simon Pilgrim645e1ad2016-05-30 17:55:25 +00001857_mm_store1_pd(double *__dp, __m128d __a)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001858{
Simon Pilgrim645e1ad2016-05-30 17:55:25 +00001859 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
1860 _mm_store_pd(__dp, __a);
1861}
1862
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001863/// \brief Stores a 128-bit vector of [2 x double] into an aligned memory
1864/// location.
1865///
1866/// \headerfile <x86intrin.h>
1867///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001868/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001869///
1870/// \param __dp
1871/// A pointer to a 128-bit memory location. The address of the memory
1872/// location has to be 16-byte aligned.
1873/// \param __a
1874/// A 128-bit vector of [2 x double] containing the values to be stored.
Simon Pilgrim645e1ad2016-05-30 17:55:25 +00001875static __inline__ void __DEFAULT_FN_ATTRS
1876_mm_store_pd1(double *__dp, __m128d __a)
1877{
1878 return _mm_store1_pd(__dp, __a);
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001879}
1880
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001881/// \brief Stores a 128-bit vector of [2 x double] into an unaligned memory
1882/// location.
1883///
1884/// \headerfile <x86intrin.h>
1885///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001886/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001887///
1888/// \param __dp
1889/// A pointer to a 128-bit memory location. The address of the memory
1890/// location does not have to be aligned.
1891/// \param __a
1892/// A 128-bit vector of [2 x double] containing the values to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00001893static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001894_mm_storeu_pd(double *__dp, __m128d __a)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001895{
Craig Topper09175da2016-05-30 17:10:30 +00001896 struct __storeu_pd {
1897 __m128d __v;
1898 } __attribute__((__packed__, __may_alias__));
1899 ((struct __storeu_pd*)__dp)->__v = __a;
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001900}
1901
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001902/// \brief Stores two double-precision values, in reverse order, from a 128-bit
1903/// vector of [2 x double] to a 16-byte aligned memory location.
1904///
1905/// \headerfile <x86intrin.h>
1906///
1907/// This intrinsic corresponds to a shuffling instruction followed by a
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001908/// <c> VMOVAPD / MOVAPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001909///
1910/// \param __dp
1911/// A pointer to a 16-byte aligned memory location that can store two
1912/// double-precision values.
1913/// \param __a
1914/// A 128-bit vector of [2 x double] containing the values to be reversed and
1915/// stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00001916static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001917_mm_storer_pd(double *__dp, __m128d __a)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001918{
Craig Topper1aa231e2016-05-16 06:38:42 +00001919 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
David Blaikie3302f2b2013-01-16 23:08:36 +00001920 *(__m128d *)__dp = __a;
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001921}
1922
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001923/// \brief Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
1924/// memory location.
1925///
1926/// \headerfile <x86intrin.h>
1927///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001928/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001929///
1930/// \param __dp
1931/// A pointer to a 64-bit memory location.
1932/// \param __a
1933/// A 128-bit vector of [2 x double] containing the value to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00001934static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001935_mm_storeh_pd(double *__dp, __m128d __a)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001936{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001937 struct __mm_storeh_pd_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00001938 double __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001939 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001940 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001941}
1942
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001943/// \brief Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1944/// memory location.
1945///
1946/// \headerfile <x86intrin.h>
1947///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001948/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001949///
1950/// \param __dp
1951/// A pointer to a 64-bit memory location.
1952/// \param __a
1953/// A 128-bit vector of [2 x double] containing the value to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00001954static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001955_mm_storel_pd(double *__dp, __m128d __a)
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001956{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001957 struct __mm_storeh_pd_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00001958 double __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00001959 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001960 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
Anders Carlssonb08ac0b2008-12-24 02:11:54 +00001961}
1962
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001963/// \brief Adds the corresponding elements of two 128-bit vectors of [16 x i8],
1964/// saving the lower 8 bits of each sum in the corresponding element of a
1965/// 128-bit result vector of [16 x i8]. The integer elements of both
1966/// parameters can be either signed or unsigned.
1967///
1968/// \headerfile <x86intrin.h>
1969///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001970/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001971///
1972/// \param __a
1973/// A 128-bit vector of [16 x i8].
1974/// \param __b
1975/// A 128-bit vector of [16 x i8].
1976/// \returns A 128-bit vector of [16 x i8] containing the sums of both
1977/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001978static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001979_mm_add_epi8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00001980{
Craig Topper6a77b622016-06-04 05:43:41 +00001981 return (__m128i)((__v16qu)__a + (__v16qu)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00001982}
1983
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001984/// \brief Adds the corresponding elements of two 128-bit vectors of [8 x i16],
1985/// saving the lower 16 bits of each sum in the corresponding element of a
1986/// 128-bit result vector of [8 x i16]. The integer elements of both
1987/// parameters can be either signed or unsigned.
1988///
1989/// \headerfile <x86intrin.h>
1990///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001991/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00001992///
1993/// \param __a
1994/// A 128-bit vector of [8 x i16].
1995/// \param __b
1996/// A 128-bit vector of [8 x i16].
1997/// \returns A 128-bit vector of [8 x i16] containing the sums of both
1998/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001999static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002000_mm_add_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002001{
Craig Topper6a77b622016-06-04 05:43:41 +00002002 return (__m128i)((__v8hu)__a + (__v8hu)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002003}
2004
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002005/// \brief Adds the corresponding elements of two 128-bit vectors of [4 x i32],
2006/// saving the lower 32 bits of each sum in the corresponding element of a
2007/// 128-bit result vector of [4 x i32]. The integer elements of both
2008/// parameters can be either signed or unsigned.
2009///
2010/// \headerfile <x86intrin.h>
2011///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002012/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002013///
2014/// \param __a
2015/// A 128-bit vector of [4 x i32].
2016/// \param __b
2017/// A 128-bit vector of [4 x i32].
2018/// \returns A 128-bit vector of [4 x i32] containing the sums of both
2019/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002020static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002021_mm_add_epi32(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002022{
Craig Topper6a77b622016-06-04 05:43:41 +00002023 return (__m128i)((__v4su)__a + (__v4su)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002024}
2025
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002026/// \brief Adds two signed or unsigned 64-bit integer values, returning the
2027/// lower 64 bits of the sum.
2028///
2029/// \headerfile <x86intrin.h>
2030///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002031/// This intrinsic corresponds to the <c> PADDQ </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002032///
2033/// \param __a
2034/// A 64-bit integer.
2035/// \param __b
2036/// A 64-bit integer.
2037/// \returns A 64-bit integer containing the sum of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002038static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002039_mm_add_si64(__m64 __a, __m64 __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002040{
Craig Topper1aa231e2016-05-16 06:38:42 +00002041 return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002042}
2043
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002044/// \brief Adds the corresponding elements of two 128-bit vectors of [2 x i64],
2045/// saving the lower 64 bits of each sum in the corresponding element of a
2046/// 128-bit result vector of [2 x i64]. The integer elements of both
2047/// parameters can be either signed or unsigned.
2048///
2049/// \headerfile <x86intrin.h>
2050///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002051/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002052///
2053/// \param __a
2054/// A 128-bit vector of [2 x i64].
2055/// \param __b
2056/// A 128-bit vector of [2 x i64].
2057/// \returns A 128-bit vector of [2 x i64] containing the sums of both
2058/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002059static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002060_mm_add_epi64(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002061{
Craig Topper6a77b622016-06-04 05:43:41 +00002062 return (__m128i)((__v2du)__a + (__v2du)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002063}
2064
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002065/// \brief Adds, with saturation, the corresponding elements of two 128-bit
2066/// signed [16 x i8] vectors, saving each sum in the corresponding element of
2067/// a 128-bit result vector of [16 x i8]. Positive sums greater than 7Fh are
2068/// saturated to 7Fh. Negative sums less than 80h are saturated to 80h.
2069///
2070/// \headerfile <x86intrin.h>
2071///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002072/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002073///
2074/// \param __a
2075/// A 128-bit signed [16 x i8] vector.
2076/// \param __b
2077/// A 128-bit signed [16 x i8] vector.
2078/// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
2079/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002080static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002081_mm_adds_epi8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002082{
David Blaikie3302f2b2013-01-16 23:08:36 +00002083 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002084}
2085
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002086/// \brief Adds, with saturation, the corresponding elements of two 128-bit
2087/// signed [8 x i16] vectors, saving each sum in the corresponding element of
2088/// a 128-bit result vector of [8 x i16]. Positive sums greater than 7FFFh
2089/// are saturated to 7FFFh. Negative sums less than 8000h are saturated to
2090/// 8000h.
2091///
2092/// \headerfile <x86intrin.h>
2093///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002094/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002095///
2096/// \param __a
2097/// A 128-bit signed [8 x i16] vector.
2098/// \param __b
2099/// A 128-bit signed [8 x i16] vector.
2100/// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
2101/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002102static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002103_mm_adds_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002104{
David Blaikie3302f2b2013-01-16 23:08:36 +00002105 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002106}
2107
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002108/// \brief Adds, with saturation, the corresponding elements of two 128-bit
2109/// unsigned [16 x i8] vectors, saving each sum in the corresponding element
2110/// of a 128-bit result vector of [16 x i8]. Positive sums greater than FFh
2111/// are saturated to FFh. Negative sums are saturated to 00h.
2112///
2113/// \headerfile <x86intrin.h>
2114///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002115/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002116///
2117/// \param __a
2118/// A 128-bit unsigned [16 x i8] vector.
2119/// \param __b
2120/// A 128-bit unsigned [16 x i8] vector.
2121/// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums
2122/// of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002123static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002124_mm_adds_epu8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002125{
David Blaikie3302f2b2013-01-16 23:08:36 +00002126 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002127}
2128
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002129/// \brief Adds, with saturation, the corresponding elements of two 128-bit
2130/// unsigned [8 x i16] vectors, saving each sum in the corresponding element
2131/// of a 128-bit result vector of [8 x i16]. Positive sums greater than FFFFh
2132/// are saturated to FFFFh. Negative sums are saturated to 0000h.
2133///
2134/// \headerfile <x86intrin.h>
2135///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002136/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002137///
2138/// \param __a
2139/// A 128-bit unsigned [8 x i16] vector.
2140/// \param __b
2141/// A 128-bit unsigned [8 x i16] vector.
2142/// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums
2143/// of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002144static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002145_mm_adds_epu16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002146{
David Blaikie3302f2b2013-01-16 23:08:36 +00002147 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002148}
2149
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002150/// \brief Computes the rounded avarages of corresponding elements of two
2151/// 128-bit unsigned [16 x i8] vectors, saving each result in the
2152/// corresponding element of a 128-bit result vector of [16 x i8].
2153///
2154/// \headerfile <x86intrin.h>
2155///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002156/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002157///
2158/// \param __a
2159/// A 128-bit unsigned [16 x i8] vector.
2160/// \param __b
2161/// A 128-bit unsigned [16 x i8] vector.
2162/// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
2163/// averages of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002164static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002165_mm_avg_epu8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002166{
David Blaikie3302f2b2013-01-16 23:08:36 +00002167 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002168}
2169
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002170/// \brief Computes the rounded avarages of corresponding elements of two
2171/// 128-bit unsigned [8 x i16] vectors, saving each result in the
2172/// corresponding element of a 128-bit result vector of [8 x i16].
2173///
2174/// \headerfile <x86intrin.h>
2175///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002176/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002177///
2178/// \param __a
2179/// A 128-bit unsigned [8 x i16] vector.
2180/// \param __b
2181/// A 128-bit unsigned [8 x i16] vector.
2182/// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
2183/// averages of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002184static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002185_mm_avg_epu16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002186{
David Blaikie3302f2b2013-01-16 23:08:36 +00002187 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002188}
2189
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002190/// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]
2191/// vectors, producing eight intermediate 32-bit signed integer products, and
2192/// adds the consecutive pairs of 32-bit products to form a 128-bit signed
2193/// [4 x i32] vector. For example, bits [15:0] of both parameters are
2194/// multiplied producing a 32-bit product, bits [31:16] of both parameters
2195/// are multiplied producing a 32-bit product, and the sum of those two
2196/// products becomes bits [31:0] of the result.
2197///
2198/// \headerfile <x86intrin.h>
2199///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002200/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002201///
2202/// \param __a
2203/// A 128-bit signed [8 x i16] vector.
2204/// \param __b
2205/// A 128-bit signed [8 x i16] vector.
2206/// \returns A 128-bit signed [4 x i32] vector containing the sums of products
2207/// of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00002208static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002209_mm_madd_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002210{
David Blaikie3302f2b2013-01-16 23:08:36 +00002211 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002212}
2213
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002214/// \brief Compares corresponding elements of two 128-bit signed [8 x i16]
2215/// vectors, saving the greater value from each comparison in the
2216/// corresponding element of a 128-bit result vector of [8 x i16].
2217///
2218/// \headerfile <x86intrin.h>
2219///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002220/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002221///
2222/// \param __a
2223/// A 128-bit signed [8 x i16] vector.
2224/// \param __b
2225/// A 128-bit signed [8 x i16] vector.
2226/// \returns A 128-bit signed [8 x i16] vector containing the greater value of
2227/// each comparison.
Michael Kupersteine45af542015-06-30 13:36:19 +00002228static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002229_mm_max_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002230{
David Blaikie3302f2b2013-01-16 23:08:36 +00002231 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002232}
2233
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002234/// \brief Compares corresponding elements of two 128-bit unsigned [16 x i8]
2235/// vectors, saving the greater value from each comparison in the
2236/// corresponding element of a 128-bit result vector of [16 x i8].
2237///
2238/// \headerfile <x86intrin.h>
2239///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002240/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002241///
2242/// \param __a
2243/// A 128-bit unsigned [16 x i8] vector.
2244/// \param __b
2245/// A 128-bit unsigned [16 x i8] vector.
2246/// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
2247/// each comparison.
Michael Kupersteine45af542015-06-30 13:36:19 +00002248static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002249_mm_max_epu8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002250{
David Blaikie3302f2b2013-01-16 23:08:36 +00002251 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002252}
2253
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002254/// \brief Compares corresponding elements of two 128-bit signed [8 x i16]
2255/// vectors, saving the smaller value from each comparison in the
2256/// corresponding element of a 128-bit result vector of [8 x i16].
2257///
2258/// \headerfile <x86intrin.h>
2259///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002260/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002261///
2262/// \param __a
2263/// A 128-bit signed [8 x i16] vector.
2264/// \param __b
2265/// A 128-bit signed [8 x i16] vector.
2266/// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
2267/// each comparison.
Michael Kupersteine45af542015-06-30 13:36:19 +00002268static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002269_mm_min_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002270{
David Blaikie3302f2b2013-01-16 23:08:36 +00002271 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002272}
2273
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002274/// \brief Compares corresponding elements of two 128-bit unsigned [16 x i8]
2275/// vectors, saving the smaller value from each comparison in the
2276/// corresponding element of a 128-bit result vector of [16 x i8].
2277///
2278/// \headerfile <x86intrin.h>
2279///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002280/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002281///
2282/// \param __a
2283/// A 128-bit unsigned [16 x i8] vector.
2284/// \param __b
2285/// A 128-bit unsigned [16 x i8] vector.
2286/// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
2287/// each comparison.
Michael Kupersteine45af542015-06-30 13:36:19 +00002288static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002289_mm_min_epu8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002290{
David Blaikie3302f2b2013-01-16 23:08:36 +00002291 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002292}
2293
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002294/// \brief Multiplies the corresponding elements of two signed [8 x i16]
2295/// vectors, saving the upper 16 bits of each 32-bit product in the
2296/// corresponding element of a 128-bit signed [8 x i16] result vector.
2297///
2298/// \headerfile <x86intrin.h>
2299///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002300/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002301///
2302/// \param __a
2303/// A 128-bit signed [8 x i16] vector.
2304/// \param __b
2305/// A 128-bit signed [8 x i16] vector.
2306/// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
2307/// each of the eight 32-bit products.
Michael Kupersteine45af542015-06-30 13:36:19 +00002308static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002309_mm_mulhi_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002310{
David Blaikie3302f2b2013-01-16 23:08:36 +00002311 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002312}
2313
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002314/// \brief Multiplies the corresponding elements of two unsigned [8 x i16]
2315/// vectors, saving the upper 16 bits of each 32-bit product in the
2316/// corresponding element of a 128-bit unsigned [8 x i16] result vector.
2317///
2318/// \headerfile <x86intrin.h>
2319///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002320/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002321///
2322/// \param __a
2323/// A 128-bit unsigned [8 x i16] vector.
2324/// \param __b
2325/// A 128-bit unsigned [8 x i16] vector.
2326/// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
2327/// of each of the eight 32-bit products.
Michael Kupersteine45af542015-06-30 13:36:19 +00002328static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002329_mm_mulhi_epu16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002330{
David Blaikie3302f2b2013-01-16 23:08:36 +00002331 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002332}
2333
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002334/// \brief Multiplies the corresponding elements of two signed [8 x i16]
2335/// vectors, saving the lower 16 bits of each 32-bit product in the
2336/// corresponding element of a 128-bit signed [8 x i16] result vector.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002337///
2338/// \headerfile <x86intrin.h>
2339///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002340/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002341///
2342/// \param __a
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002343/// A 128-bit signed [8 x i16] vector.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002344/// \param __b
Ekaterina Romanova06477bf2016-10-23 07:30:50 +00002345/// A 128-bit signed [8 x i16] vector.
2346/// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
2347/// each of the eight 32-bit products.
Michael Kupersteine45af542015-06-30 13:36:19 +00002348static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002349_mm_mullo_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002350{
Craig Topper6a77b622016-06-04 05:43:41 +00002351 return (__m128i)((__v8hu)__a * (__v8hu)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002352}
2353
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002354/// \brief Multiplies 32-bit unsigned integer values contained in the lower bits
2355/// of the two 64-bit integer vectors and returns the 64-bit unsigned
2356/// product.
2357///
2358/// \headerfile <x86intrin.h>
2359///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002360/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002361///
2362/// \param __a
2363/// A 64-bit integer containing one of the source operands.
2364/// \param __b
2365/// A 64-bit integer containing one of the source operands.
2366/// \returns A 64-bit integer vector containing the product of both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002367static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002368_mm_mul_su32(__m64 __a, __m64 __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002369{
David Blaikie3302f2b2013-01-16 23:08:36 +00002370 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002371}
2372
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002373/// \brief Multiplies 32-bit unsigned integer values contained in the lower
2374/// bits of the corresponding elements of two [2 x i64] vectors, and returns
2375/// the 64-bit products in the corresponding elements of a [2 x i64] vector.
2376///
2377/// \headerfile <x86intrin.h>
2378///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002379/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002380///
2381/// \param __a
2382/// A [2 x i64] vector containing one of the source operands.
2383/// \param __b
2384/// A [2 x i64] vector containing one of the source operands.
2385/// \returns A [2 x i64] vector containing the product of both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002386static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002387_mm_mul_epu32(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002388{
David Blaikie3302f2b2013-01-16 23:08:36 +00002389 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002390}
2391
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002392/// \brief Computes the absolute differences of corresponding 8-bit integer
2393/// values in two 128-bit vectors. Sums the first 8 absolute differences, and
2394/// separately sums the second 8 absolute differences. Packss these two
2395/// unsigned 16-bit integer sums into the upper and lower elements of a
2396/// [2 x i64] vector.
2397///
2398/// \headerfile <x86intrin.h>
2399///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002400/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002401///
2402/// \param __a
2403/// A 128-bit integer vector containing one of the source operands.
2404/// \param __b
2405/// A 128-bit integer vector containing one of the source operands.
2406/// \returns A [2 x i64] vector containing the sums of the sets of absolute
2407/// differences between both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002408static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002409_mm_sad_epu8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002410{
David Blaikie3302f2b2013-01-16 23:08:36 +00002411 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002412}
2413
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002414/// \brief Subtracts the corresponding 8-bit integer values in the operands.
2415///
2416/// \headerfile <x86intrin.h>
2417///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002418/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002419///
2420/// \param __a
2421/// A 128-bit integer vector containing the minuends.
2422/// \param __b
2423/// A 128-bit integer vector containing the subtrahends.
2424/// \returns A 128-bit integer vector containing the differences of the values
2425/// in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002426static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002427_mm_sub_epi8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002428{
Craig Topper6a77b622016-06-04 05:43:41 +00002429 return (__m128i)((__v16qu)__a - (__v16qu)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002430}
2431
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002432/// \brief Subtracts the corresponding 16-bit integer values in the operands.
2433///
2434/// \headerfile <x86intrin.h>
2435///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002436/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002437///
2438/// \param __a
2439/// A 128-bit integer vector containing the minuends.
2440/// \param __b
2441/// A 128-bit integer vector containing the subtrahends.
2442/// \returns A 128-bit integer vector containing the differences of the values
2443/// in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002444static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002445_mm_sub_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002446{
Craig Topper6a77b622016-06-04 05:43:41 +00002447 return (__m128i)((__v8hu)__a - (__v8hu)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002448}
2449
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002450/// \brief Subtracts the corresponding 32-bit integer values in the operands.
2451///
2452/// \headerfile <x86intrin.h>
2453///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002454/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002455///
2456/// \param __a
2457/// A 128-bit integer vector containing the minuends.
2458/// \param __b
2459/// A 128-bit integer vector containing the subtrahends.
2460/// \returns A 128-bit integer vector containing the differences of the values
2461/// in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002462static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002463_mm_sub_epi32(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002464{
Craig Topper6a77b622016-06-04 05:43:41 +00002465 return (__m128i)((__v4su)__a - (__v4su)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002466}
2467
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002468/// \brief Subtracts signed or unsigned 64-bit integer values and writes the
2469/// difference to the corresponding bits in the destination.
2470///
2471/// \headerfile <x86intrin.h>
2472///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002473/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002474///
2475/// \param __a
2476/// A 64-bit integer vector containing the minuend.
2477/// \param __b
2478/// A 64-bit integer vector containing the subtrahend.
2479/// \returns A 64-bit integer vector containing the difference of the values in
2480/// the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002481static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002482_mm_sub_si64(__m64 __a, __m64 __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002483{
Craig Topper1aa231e2016-05-16 06:38:42 +00002484 return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002485}
2486
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002487/// \brief Subtracts the corresponding elements of two [2 x i64] vectors.
2488///
2489/// \headerfile <x86intrin.h>
2490///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002491/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002492///
2493/// \param __a
2494/// A 128-bit integer vector containing the minuends.
2495/// \param __b
2496/// A 128-bit integer vector containing the subtrahends.
2497/// \returns A 128-bit integer vector containing the differences of the values
2498/// in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002499static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002500_mm_sub_epi64(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002501{
Craig Topper6a77b622016-06-04 05:43:41 +00002502 return (__m128i)((__v2du)__a - (__v2du)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002503}
2504
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002505/// \brief Subtracts corresponding 8-bit signed integer values in the input and
2506/// returns the differences in the corresponding bytes in the destination.
2507/// Differences greater than 7Fh are saturated to 7Fh, and differences less
2508/// than 80h are saturated to 80h.
2509///
2510/// \headerfile <x86intrin.h>
2511///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002512/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002513///
2514/// \param __a
2515/// A 128-bit integer vector containing the minuends.
2516/// \param __b
2517/// A 128-bit integer vector containing the subtrahends.
2518/// \returns A 128-bit integer vector containing the differences of the values
2519/// in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002520static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002521_mm_subs_epi8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002522{
David Blaikie3302f2b2013-01-16 23:08:36 +00002523 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002524}
2525
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002526/// \brief Subtracts corresponding 16-bit signed integer values in the input and
2527/// returns the differences in the corresponding bytes in the destination.
2528/// Differences greater than 7FFFh are saturated to 7FFFh, and values less
2529/// than 8000h are saturated to 8000h.
2530///
2531/// \headerfile <x86intrin.h>
2532///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002533/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002534///
2535/// \param __a
2536/// A 128-bit integer vector containing the minuends.
2537/// \param __b
2538/// A 128-bit integer vector containing the subtrahends.
2539/// \returns A 128-bit integer vector containing the differences of the values
2540/// in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002541static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002542_mm_subs_epi16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002543{
David Blaikie3302f2b2013-01-16 23:08:36 +00002544 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002545}
2546
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002547/// \brief Subtracts corresponding 8-bit unsigned integer values in the input
2548/// and returns the differences in the corresponding bytes in the
2549/// destination. Differences less than 00h are saturated to 00h.
2550///
2551/// \headerfile <x86intrin.h>
2552///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002553/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002554///
2555/// \param __a
2556/// A 128-bit integer vector containing the minuends.
2557/// \param __b
2558/// A 128-bit integer vector containing the subtrahends.
2559/// \returns A 128-bit integer vector containing the unsigned integer
2560/// differences of the values in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002561static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002562_mm_subs_epu8(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002563{
David Blaikie3302f2b2013-01-16 23:08:36 +00002564 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002565}
2566
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002567/// \brief Subtracts corresponding 16-bit unsigned integer values in the input
2568/// and returns the differences in the corresponding bytes in the
2569/// destination. Differences less than 0000h are saturated to 0000h.
2570///
2571/// \headerfile <x86intrin.h>
2572///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002573/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002574///
2575/// \param __a
2576/// A 128-bit integer vector containing the minuends.
2577/// \param __b
2578/// A 128-bit integer vector containing the subtrahends.
2579/// \returns A 128-bit integer vector containing the unsigned integer
2580/// differences of the values in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002581static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002582_mm_subs_epu16(__m128i __a, __m128i __b)
Anders Carlssona283f912008-12-24 02:41:00 +00002583{
David Blaikie3302f2b2013-01-16 23:08:36 +00002584 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
Anders Carlssona283f912008-12-24 02:41:00 +00002585}
2586
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002587/// \brief Performs a bitwise AND of two 128-bit integer vectors.
2588///
2589/// \headerfile <x86intrin.h>
2590///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002591/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002592///
2593/// \param __a
2594/// A 128-bit integer vector containing one of the source operands.
2595/// \param __b
2596/// A 128-bit integer vector containing one of the source operands.
2597/// \returns A 128-bit integer vector containing the bitwise AND of the values
2598/// in both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002599static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002600_mm_and_si128(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002601{
Craig Topper6a77b622016-06-04 05:43:41 +00002602 return (__m128i)((__v2du)__a & (__v2du)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002603}
2604
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002605/// \brief Performs a bitwise AND of two 128-bit integer vectors, using the
2606/// one's complement of the values contained in the first source operand.
2607///
2608/// \headerfile <x86intrin.h>
2609///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002610/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002611///
2612/// \param __a
2613/// A 128-bit vector containing the left source operand. The one's complement
2614/// of this value is used in the bitwise AND.
2615/// \param __b
2616/// A 128-bit vector containing the right source operand.
2617/// \returns A 128-bit integer vector containing the bitwise AND of the one's
2618/// complement of the first operand and the values in the second operand.
Michael Kupersteine45af542015-06-30 13:36:19 +00002619static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002620_mm_andnot_si128(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002621{
Craig Topper6a77b622016-06-04 05:43:41 +00002622 return (__m128i)(~(__v2du)__a & (__v2du)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002623}
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002624/// \brief Performs a bitwise OR of two 128-bit integer vectors.
2625///
2626/// \headerfile <x86intrin.h>
2627///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002628/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002629///
2630/// \param __a
2631/// A 128-bit integer vector containing one of the source operands.
2632/// \param __b
2633/// A 128-bit integer vector containing one of the source operands.
2634/// \returns A 128-bit integer vector containing the bitwise OR of the values
2635/// in both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002636static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002637_mm_or_si128(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002638{
Craig Topper6a77b622016-06-04 05:43:41 +00002639 return (__m128i)((__v2du)__a | (__v2du)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002640}
2641
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002642/// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors.
2643///
2644/// \headerfile <x86intrin.h>
2645///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002646/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002647///
2648/// \param __a
2649/// A 128-bit integer vector containing one of the source operands.
2650/// \param __b
2651/// A 128-bit integer vector containing one of the source operands.
2652/// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
2653/// values in both operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00002654static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002655_mm_xor_si128(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002656{
Craig Topper6a77b622016-06-04 05:43:41 +00002657 return (__m128i)((__v2du)__a ^ (__v2du)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002658}
2659
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002660/// \brief Left-shifts the 128-bit integer vector operand by the specified
2661/// number of bytes. Low-order bits are cleared.
2662///
2663/// \headerfile <x86intrin.h>
2664///
2665/// \code
2666/// __m128i _mm_slli_si128(__m128i a, const int imm);
2667/// \endcode
2668///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002669/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002670///
2671/// \param a
2672/// A 128-bit integer vector containing the source operand.
2673/// \param imm
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00002674/// An immediate value specifying the number of bytes to left-shift operand
2675/// \a a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002676/// \returns A 128-bit integer vector containing the left-shifted value.
Craig Topper50e3dfe2016-06-25 07:31:14 +00002677#define _mm_slli_si128(a, imm) __extension__ ({ \
2678 (__m128i)__builtin_shufflevector( \
2679 (__v16qi)_mm_setzero_si128(), \
2680 (__v16qi)(__m128i)(a), \
2681 ((char)(imm)&0xF0) ? 0 : 16 - (char)(imm), \
2682 ((char)(imm)&0xF0) ? 1 : 17 - (char)(imm), \
2683 ((char)(imm)&0xF0) ? 2 : 18 - (char)(imm), \
2684 ((char)(imm)&0xF0) ? 3 : 19 - (char)(imm), \
2685 ((char)(imm)&0xF0) ? 4 : 20 - (char)(imm), \
2686 ((char)(imm)&0xF0) ? 5 : 21 - (char)(imm), \
2687 ((char)(imm)&0xF0) ? 6 : 22 - (char)(imm), \
2688 ((char)(imm)&0xF0) ? 7 : 23 - (char)(imm), \
2689 ((char)(imm)&0xF0) ? 8 : 24 - (char)(imm), \
2690 ((char)(imm)&0xF0) ? 9 : 25 - (char)(imm), \
2691 ((char)(imm)&0xF0) ? 10 : 26 - (char)(imm), \
2692 ((char)(imm)&0xF0) ? 11 : 27 - (char)(imm), \
2693 ((char)(imm)&0xF0) ? 12 : 28 - (char)(imm), \
2694 ((char)(imm)&0xF0) ? 13 : 29 - (char)(imm), \
2695 ((char)(imm)&0xF0) ? 14 : 30 - (char)(imm), \
2696 ((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)); })
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002697
Craig Toppera4624822015-02-13 06:04:45 +00002698#define _mm_bslli_si128(a, imm) \
2699 _mm_slli_si128((a), (imm))
2700
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002701/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
2702/// by the specified number of bits. Low-order bits are cleared.
2703///
2704/// \headerfile <x86intrin.h>
2705///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002706/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002707///
2708/// \param __a
2709/// A 128-bit integer vector containing the source operand.
2710/// \param __count
2711/// An integer value specifying the number of bits to left-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002712/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002713/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002714static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002715_mm_slli_epi16(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002716{
David Blaikie3302f2b2013-01-16 23:08:36 +00002717 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002718}
2719
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002720/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
2721/// by the specified number of bits. Low-order bits are cleared.
2722///
2723/// \headerfile <x86intrin.h>
2724///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002725/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002726///
2727/// \param __a
2728/// A 128-bit integer vector containing the source operand.
2729/// \param __count
2730/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002731/// to left-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002732/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002733static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002734_mm_sll_epi16(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002735{
David Blaikie3302f2b2013-01-16 23:08:36 +00002736 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002737}
2738
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002739/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
2740/// by the specified number of bits. Low-order bits are cleared.
2741///
2742/// \headerfile <x86intrin.h>
2743///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002744/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002745///
2746/// \param __a
2747/// A 128-bit integer vector containing the source operand.
2748/// \param __count
2749/// An integer value specifying the number of bits to left-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002750/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002751/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002752static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002753_mm_slli_epi32(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002754{
David Blaikie3302f2b2013-01-16 23:08:36 +00002755 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002756}
2757
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002758/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
2759/// by the specified number of bits. Low-order bits are cleared.
2760///
2761/// \headerfile <x86intrin.h>
2762///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002763/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002764///
2765/// \param __a
2766/// A 128-bit integer vector containing the source operand.
2767/// \param __count
2768/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002769/// to left-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002770/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002771static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002772_mm_sll_epi32(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002773{
David Blaikie3302f2b2013-01-16 23:08:36 +00002774 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002775}
2776
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002777/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
2778/// by the specified number of bits. Low-order bits are cleared.
2779///
2780/// \headerfile <x86intrin.h>
2781///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002782/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002783///
2784/// \param __a
2785/// A 128-bit integer vector containing the source operand.
2786/// \param __count
2787/// An integer value specifying the number of bits to left-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002788/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002789/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002790static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002791_mm_slli_epi64(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002792{
Craig Topper1aa231e2016-05-16 06:38:42 +00002793 return __builtin_ia32_psllqi128((__v2di)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002794}
2795
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002796/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
2797/// by the specified number of bits. Low-order bits are cleared.
2798///
2799/// \headerfile <x86intrin.h>
2800///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002801/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002802///
2803/// \param __a
2804/// A 128-bit integer vector containing the source operand.
2805/// \param __count
2806/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002807/// to left-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002808/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002809static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002810_mm_sll_epi64(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002811{
Craig Topper1aa231e2016-05-16 06:38:42 +00002812 return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002813}
2814
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002815/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
2816/// by the specified number of bits. High-order bits are filled with the sign
2817/// bit of the initial value.
2818///
2819/// \headerfile <x86intrin.h>
2820///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002821/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002822///
2823/// \param __a
2824/// A 128-bit integer vector containing the source operand.
2825/// \param __count
2826/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002827/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002828/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002829static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002830_mm_srai_epi16(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002831{
David Blaikie3302f2b2013-01-16 23:08:36 +00002832 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002833}
2834
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002835/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
2836/// by the specified number of bits. High-order bits are filled with the sign
2837/// bit of the initial value.
2838///
2839/// \headerfile <x86intrin.h>
2840///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002841/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002842///
2843/// \param __a
2844/// A 128-bit integer vector containing the source operand.
2845/// \param __count
2846/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002847/// to right-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002848/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002849static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002850_mm_sra_epi16(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002851{
David Blaikie3302f2b2013-01-16 23:08:36 +00002852 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002853}
2854
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002855/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
2856/// by the specified number of bits. High-order bits are filled with the sign
2857/// bit of the initial value.
2858///
2859/// \headerfile <x86intrin.h>
2860///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002861/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002862///
2863/// \param __a
2864/// A 128-bit integer vector containing the source operand.
2865/// \param __count
2866/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002867/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002868/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002869static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002870_mm_srai_epi32(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002871{
David Blaikie3302f2b2013-01-16 23:08:36 +00002872 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002873}
2874
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002875/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
2876/// by the specified number of bits. High-order bits are filled with the sign
2877/// bit of the initial value.
2878///
2879/// \headerfile <x86intrin.h>
2880///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002881/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002882///
2883/// \param __a
2884/// A 128-bit integer vector containing the source operand.
2885/// \param __count
2886/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002887/// to right-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002888/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002889static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002890_mm_sra_epi32(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002891{
David Blaikie3302f2b2013-01-16 23:08:36 +00002892 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002893}
2894
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002895/// \brief Right-shifts the 128-bit integer vector operand by the specified
2896/// number of bytes. High-order bits are cleared.
2897///
2898/// \headerfile <x86intrin.h>
2899///
2900/// \code
2901/// __m128i _mm_srli_si128(__m128i a, const int imm);
2902/// \endcode
2903///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002904/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002905///
2906/// \param a
2907/// A 128-bit integer vector containing the source operand.
2908/// \param imm
2909/// An immediate value specifying the number of bytes to right-shift operand
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002910/// \a a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002911/// \returns A 128-bit integer vector containing the right-shifted value.
Craig Topper50e3dfe2016-06-25 07:31:14 +00002912#define _mm_srli_si128(a, imm) __extension__ ({ \
2913 (__m128i)__builtin_shufflevector( \
2914 (__v16qi)(__m128i)(a), \
2915 (__v16qi)_mm_setzero_si128(), \
2916 ((char)(imm)&0xF0) ? 16 : (char)(imm) + 0, \
2917 ((char)(imm)&0xF0) ? 17 : (char)(imm) + 1, \
2918 ((char)(imm)&0xF0) ? 18 : (char)(imm) + 2, \
2919 ((char)(imm)&0xF0) ? 19 : (char)(imm) + 3, \
2920 ((char)(imm)&0xF0) ? 20 : (char)(imm) + 4, \
2921 ((char)(imm)&0xF0) ? 21 : (char)(imm) + 5, \
2922 ((char)(imm)&0xF0) ? 22 : (char)(imm) + 6, \
2923 ((char)(imm)&0xF0) ? 23 : (char)(imm) + 7, \
2924 ((char)(imm)&0xF0) ? 24 : (char)(imm) + 8, \
2925 ((char)(imm)&0xF0) ? 25 : (char)(imm) + 9, \
2926 ((char)(imm)&0xF0) ? 26 : (char)(imm) + 10, \
2927 ((char)(imm)&0xF0) ? 27 : (char)(imm) + 11, \
2928 ((char)(imm)&0xF0) ? 28 : (char)(imm) + 12, \
2929 ((char)(imm)&0xF0) ? 29 : (char)(imm) + 13, \
2930 ((char)(imm)&0xF0) ? 30 : (char)(imm) + 14, \
2931 ((char)(imm)&0xF0) ? 31 : (char)(imm) + 15); })
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002932
Craig Toppera4624822015-02-13 06:04:45 +00002933#define _mm_bsrli_si128(a, imm) \
2934 _mm_srli_si128((a), (imm))
2935
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002936/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
2937/// operand by the specified number of bits. High-order bits are cleared.
2938///
2939/// \headerfile <x86intrin.h>
2940///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002941/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002942///
2943/// \param __a
2944/// A 128-bit integer vector containing the source operand.
2945/// \param __count
2946/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002947/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002948/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002949static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002950_mm_srli_epi16(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002951{
David Blaikie3302f2b2013-01-16 23:08:36 +00002952 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002953}
2954
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002955/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
2956/// operand by the specified number of bits. High-order bits are cleared.
2957///
2958/// \headerfile <x86intrin.h>
2959///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002960/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002961///
2962/// \param __a
2963/// A 128-bit integer vector containing the source operand.
2964/// \param __count
2965/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002966/// to right-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002967/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002968static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002969_mm_srl_epi16(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002970{
David Blaikie3302f2b2013-01-16 23:08:36 +00002971 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002972}
2973
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002974/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
2975/// operand by the specified number of bits. High-order bits are cleared.
2976///
2977/// \headerfile <x86intrin.h>
2978///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002979/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002980///
2981/// \param __a
2982/// A 128-bit integer vector containing the source operand.
2983/// \param __count
2984/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00002985/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002986/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00002987static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00002988_mm_srli_epi32(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002989{
David Blaikie3302f2b2013-01-16 23:08:36 +00002990 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00002991}
2992
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002993/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
2994/// operand by the specified number of bits. High-order bits are cleared.
2995///
2996/// \headerfile <x86intrin.h>
2997///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00002998/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00002999///
3000/// \param __a
3001/// A 128-bit integer vector containing the source operand.
3002/// \param __count
3003/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00003004/// to right-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003005/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003006static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003007_mm_srl_epi32(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003008{
David Blaikie3302f2b2013-01-16 23:08:36 +00003009 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003010}
3011
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003012/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
3013/// operand by the specified number of bits. High-order bits are cleared.
3014///
3015/// \headerfile <x86intrin.h>
3016///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003017/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003018///
3019/// \param __a
3020/// A 128-bit integer vector containing the source operand.
3021/// \param __count
3022/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00003023/// in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003024/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003025static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003026_mm_srli_epi64(__m128i __a, int __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003027{
Craig Topper1aa231e2016-05-16 06:38:42 +00003028 return __builtin_ia32_psrlqi128((__v2di)__a, __count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003029}
3030
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003031/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
3032/// operand by the specified number of bits. High-order bits are cleared.
3033///
3034/// \headerfile <x86intrin.h>
3035///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003036/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003037///
3038/// \param __a
3039/// A 128-bit integer vector containing the source operand.
3040/// \param __count
3041/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00003042/// to right-shift each value in operand \a __a.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003043/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003044static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003045_mm_srl_epi64(__m128i __a, __m128i __count)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003046{
Craig Topper1aa231e2016-05-16 06:38:42 +00003047 return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003048}
3049
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003050/// \brief Compares each of the corresponding 8-bit values of the 128-bit
3051/// integer vectors for equality. Each comparison yields 0h for false, FFh
3052/// for true.
3053///
3054/// \headerfile <x86intrin.h>
3055///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003056/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003057///
3058/// \param __a
3059/// A 128-bit integer vector.
3060/// \param __b
3061/// A 128-bit integer vector.
3062/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003063static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003064_mm_cmpeq_epi8(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003065{
David Blaikie3302f2b2013-01-16 23:08:36 +00003066 return (__m128i)((__v16qi)__a == (__v16qi)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003067}
3068
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003069/// \brief Compares each of the corresponding 16-bit values of the 128-bit
3070/// integer vectors for equality. Each comparison yields 0h for false, FFFFh
3071/// for true.
3072///
3073/// \headerfile <x86intrin.h>
3074///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003075/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003076///
3077/// \param __a
3078/// A 128-bit integer vector.
3079/// \param __b
3080/// A 128-bit integer vector.
3081/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003082static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003083_mm_cmpeq_epi16(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003084{
David Blaikie3302f2b2013-01-16 23:08:36 +00003085 return (__m128i)((__v8hi)__a == (__v8hi)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003086}
3087
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003088/// \brief Compares each of the corresponding 32-bit values of the 128-bit
3089/// integer vectors for equality. Each comparison yields 0h for false,
3090/// FFFFFFFFh for true.
3091///
3092/// \headerfile <x86intrin.h>
3093///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003094/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003095///
3096/// \param __a
3097/// A 128-bit integer vector.
3098/// \param __b
3099/// A 128-bit integer vector.
3100/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003101static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003102_mm_cmpeq_epi32(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003103{
David Blaikie3302f2b2013-01-16 23:08:36 +00003104 return (__m128i)((__v4si)__a == (__v4si)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003105}
3106
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003107/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
3108/// integer vectors to determine if the values in the first operand are
3109/// greater than those in the second operand. Each comparison yields 0h for
3110/// false, FFh for true.
3111///
3112/// \headerfile <x86intrin.h>
3113///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003114/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003115///
3116/// \param __a
3117/// A 128-bit integer vector.
3118/// \param __b
3119/// A 128-bit integer vector.
3120/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003121static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003122_mm_cmpgt_epi8(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003123{
Nick Lewyckyd0ba3792012-02-04 02:16:48 +00003124 /* This function always performs a signed comparison, but __v16qi is a char
Chandler Carruthcbe64112015-10-01 23:40:12 +00003125 which may be signed or unsigned, so use __v16qs. */
David Blaikie3302f2b2013-01-16 23:08:36 +00003126 return (__m128i)((__v16qs)__a > (__v16qs)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003127}
3128
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003129/// \brief Compares each of the corresponding signed 16-bit values of the
3130/// 128-bit integer vectors to determine if the values in the first operand
3131/// are greater than those in the second operand. Each comparison yields 0h
3132/// for false, FFFFh for true.
3133///
3134/// \headerfile <x86intrin.h>
3135///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003136/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003137///
3138/// \param __a
3139/// A 128-bit integer vector.
3140/// \param __b
3141/// A 128-bit integer vector.
3142/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003143static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003144_mm_cmpgt_epi16(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003145{
David Blaikie3302f2b2013-01-16 23:08:36 +00003146 return (__m128i)((__v8hi)__a > (__v8hi)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003147}
3148
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003149/// \brief Compares each of the corresponding signed 32-bit values of the
3150/// 128-bit integer vectors to determine if the values in the first operand
3151/// are greater than those in the second operand. Each comparison yields 0h
3152/// for false, FFFFFFFFh for true.
3153///
3154/// \headerfile <x86intrin.h>
3155///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003156/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003157///
3158/// \param __a
3159/// A 128-bit integer vector.
3160/// \param __b
3161/// A 128-bit integer vector.
3162/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003163static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003164_mm_cmpgt_epi32(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003165{
David Blaikie3302f2b2013-01-16 23:08:36 +00003166 return (__m128i)((__v4si)__a > (__v4si)__b);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003167}
3168
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003169/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
3170/// integer vectors to determine if the values in the first operand are less
3171/// than those in the second operand. Each comparison yields 0h for false,
3172/// FFh for true.
3173///
3174/// \headerfile <x86intrin.h>
3175///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003176/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003177///
3178/// \param __a
3179/// A 128-bit integer vector.
3180/// \param __b
3181/// A 128-bit integer vector.
3182/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003183static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003184_mm_cmplt_epi8(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003185{
David Blaikie3302f2b2013-01-16 23:08:36 +00003186 return _mm_cmpgt_epi8(__b, __a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003187}
3188
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003189/// \brief Compares each of the corresponding signed 16-bit values of the
3190/// 128-bit integer vectors to determine if the values in the first operand
3191/// are less than those in the second operand. Each comparison yields 0h for
3192/// false, FFFFh for true.
3193///
3194/// \headerfile <x86intrin.h>
3195///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003196/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003197///
3198/// \param __a
3199/// A 128-bit integer vector.
3200/// \param __b
3201/// A 128-bit integer vector.
3202/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003203static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003204_mm_cmplt_epi16(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003205{
David Blaikie3302f2b2013-01-16 23:08:36 +00003206 return _mm_cmpgt_epi16(__b, __a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003207}
3208
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003209/// \brief Compares each of the corresponding signed 32-bit values of the
3210/// 128-bit integer vectors to determine if the values in the first operand
3211/// are less than those in the second operand. Each comparison yields 0h for
3212/// false, FFFFFFFFh for true.
3213///
3214/// \headerfile <x86intrin.h>
3215///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003216/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003217///
3218/// \param __a
3219/// A 128-bit integer vector.
3220/// \param __b
3221/// A 128-bit integer vector.
3222/// \returns A 128-bit integer vector containing the comparison results.
Michael Kupersteine45af542015-06-30 13:36:19 +00003223static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003224_mm_cmplt_epi32(__m128i __a, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003225{
David Blaikie3302f2b2013-01-16 23:08:36 +00003226 return _mm_cmpgt_epi32(__b, __a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003227}
3228
3229#ifdef __x86_64__
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003230/// \brief Converts a 64-bit signed integer value from the second operand into a
3231/// double-precision value and returns it in the lower element of a [2 x
3232/// double] vector; the upper element of the returned vector is copied from
3233/// the upper element of the first operand.
3234///
3235/// \headerfile <x86intrin.h>
3236///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003237/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003238///
3239/// \param __a
3240/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
3241/// copied to the upper 64 bits of the destination.
3242/// \param __b
3243/// A 64-bit signed integer operand containing the value to be converted.
3244/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
3245/// converted value of the second operand. The upper 64 bits are copied from
3246/// the upper 64 bits of the first operand.
Michael Kupersteine45af542015-06-30 13:36:19 +00003247static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003248_mm_cvtsi64_sd(__m128d __a, long long __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003249{
David Blaikie3302f2b2013-01-16 23:08:36 +00003250 __a[0] = __b;
3251 return __a;
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003252}
3253
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003254/// \brief Converts the first (lower) element of a vector of [2 x double] into a
3255/// 64-bit signed integer value, according to the current rounding mode.
3256///
3257/// \headerfile <x86intrin.h>
3258///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003259/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003260///
3261/// \param __a
3262/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3263/// conversion.
3264/// \returns A 64-bit signed integer containing the converted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00003265static __inline__ long long __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003266_mm_cvtsd_si64(__m128d __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003267{
Craig Topper1aa231e2016-05-16 06:38:42 +00003268 return __builtin_ia32_cvtsd2si64((__v2df)__a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003269}
3270
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003271/// \brief Converts the first (lower) element of a vector of [2 x double] into a
3272/// 64-bit signed integer value, truncating the result when it is inexact.
3273///
3274/// \headerfile <x86intrin.h>
3275///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00003276/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
3277/// instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003278///
3279/// \param __a
3280/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3281/// conversion.
3282/// \returns A 64-bit signed integer containing the converted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00003283static __inline__ long long __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003284_mm_cvttsd_si64(__m128d __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003285{
Simon Pilgrime3b9ee02016-07-20 10:18:01 +00003286 return __builtin_ia32_cvttsd2si64((__v2df)__a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003287}
3288#endif
3289
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003290/// \brief Converts a vector of [4 x i32] into a vector of [4 x float].
3291///
3292/// \headerfile <x86intrin.h>
3293///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003294/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003295///
3296/// \param __a
3297/// A 128-bit integer vector.
3298/// \returns A 128-bit vector of [4 x float] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003299static __inline__ __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003300_mm_cvtepi32_ps(__m128i __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003301{
David Blaikie3302f2b2013-01-16 23:08:36 +00003302 return __builtin_ia32_cvtdq2ps((__v4si)__a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003303}
3304
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003305/// \brief Converts a vector of [4 x float] into a vector of [4 x i32].
3306///
3307/// \headerfile <x86intrin.h>
3308///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003309/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003310///
3311/// \param __a
3312/// A 128-bit vector of [4 x float].
3313/// \returns A 128-bit integer vector of [4 x i32] containing the converted
3314/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003315static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003316_mm_cvtps_epi32(__m128 __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003317{
Craig Topper1aa231e2016-05-16 06:38:42 +00003318 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003319}
3320
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003321/// \brief Converts a vector of [4 x float] into a vector of [4 x i32],
3322/// truncating the result when it is inexact.
3323///
3324/// \headerfile <x86intrin.h>
3325///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00003326/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>
3327/// instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003328///
3329/// \param __a
3330/// A 128-bit vector of [4 x float].
3331/// \returns A 128-bit vector of [4 x i32] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003332static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003333_mm_cvttps_epi32(__m128 __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003334{
Simon Pilgrime3b9ee02016-07-20 10:18:01 +00003335 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003336}
3337
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003338/// \brief Returns a vector of [4 x i32] where the lowest element is the input
3339/// operand and the remaining elements are zero.
3340///
3341/// \headerfile <x86intrin.h>
3342///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003343/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003344///
3345/// \param __a
3346/// A 32-bit signed integer operand.
3347/// \returns A 128-bit vector of [4 x i32].
Michael Kupersteine45af542015-06-30 13:36:19 +00003348static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003349_mm_cvtsi32_si128(int __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003350{
David Blaikie3302f2b2013-01-16 23:08:36 +00003351 return (__m128i)(__v4si){ __a, 0, 0, 0 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003352}
3353
3354#ifdef __x86_64__
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003355/// \brief Returns a vector of [2 x i64] where the lower element is the input
3356/// operand and the upper element is zero.
3357///
3358/// \headerfile <x86intrin.h>
3359///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003360/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003361///
3362/// \param __a
3363/// A 64-bit signed integer operand containing the value to be converted.
3364/// \returns A 128-bit vector of [2 x i64] containing the converted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00003365static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003366_mm_cvtsi64_si128(long long __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003367{
David Blaikie3302f2b2013-01-16 23:08:36 +00003368 return (__m128i){ __a, 0 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003369}
3370#endif
3371
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003372/// \brief Moves the least significant 32 bits of a vector of [4 x i32] to a
3373/// 32-bit signed integer value.
3374///
3375/// \headerfile <x86intrin.h>
3376///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003377/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003378///
3379/// \param __a
3380/// A vector of [4 x i32]. The least significant 32 bits are moved to the
3381/// destination.
3382/// \returns A 32-bit signed integer containing the moved value.
Michael Kupersteine45af542015-06-30 13:36:19 +00003383static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003384_mm_cvtsi128_si32(__m128i __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003385{
David Blaikie3302f2b2013-01-16 23:08:36 +00003386 __v4si __b = (__v4si)__a;
3387 return __b[0];
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003388}
3389
3390#ifdef __x86_64__
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003391/// \brief Moves the least significant 64 bits of a vector of [2 x i64] to a
3392/// 64-bit signed integer value.
3393///
3394/// \headerfile <x86intrin.h>
3395///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003396/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003397///
3398/// \param __a
3399/// A vector of [2 x i64]. The least significant 64 bits are moved to the
3400/// destination.
3401/// \returns A 64-bit signed integer containing the moved value.
Michael Kupersteine45af542015-06-30 13:36:19 +00003402static __inline__ long long __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003403_mm_cvtsi128_si64(__m128i __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003404{
David Blaikie3302f2b2013-01-16 23:08:36 +00003405 return __a[0];
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003406}
3407#endif
3408
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003409/// \brief Moves packed integer values from an aligned 128-bit memory location
3410/// to elements in a 128-bit integer vector.
3411///
3412/// \headerfile <x86intrin.h>
3413///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003414/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003415///
3416/// \param __p
3417/// An aligned pointer to a memory location containing integer values.
3418/// \returns A 128-bit integer vector containing the moved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003419static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003420_mm_load_si128(__m128i const *__p)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003421{
David Blaikie3302f2b2013-01-16 23:08:36 +00003422 return *__p;
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003423}
3424
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003425/// \brief Moves packed integer values from an unaligned 128-bit memory location
3426/// to elements in a 128-bit integer vector.
3427///
3428/// \headerfile <x86intrin.h>
3429///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003430/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003431///
3432/// \param __p
3433/// A pointer to a memory location containing integer values.
3434/// \returns A 128-bit integer vector containing the moved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00003435static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003436_mm_loadu_si128(__m128i const *__p)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003437{
Bill Wendling502931f2011-05-13 00:11:39 +00003438 struct __loadu_si128 {
David Blaikie3302f2b2013-01-16 23:08:36 +00003439 __m128i __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00003440 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00003441 return ((struct __loadu_si128*)__p)->__v;
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003442}
3443
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003444/// \brief Returns a vector of [2 x i64] where the lower element is taken from
3445/// the lower element of the operand, and the upper element is zero.
3446///
3447/// \headerfile <x86intrin.h>
3448///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003449/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003450///
3451/// \param __p
3452/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
3453/// the destination.
3454/// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
3455/// moved value. The higher order bits are cleared.
Michael Kupersteine45af542015-06-30 13:36:19 +00003456static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003457_mm_loadl_epi64(__m128i const *__p)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003458{
Eli Friedman9bb51ad2011-09-15 23:15:27 +00003459 struct __mm_loadl_epi64_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00003460 long long __u;
Eli Friedman9bb51ad2011-09-15 23:15:27 +00003461 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00003462 return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003463}
3464
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003465/// \brief Generates a 128-bit vector of [4 x i32] with unspecified content.
3466/// This could be used as an argument to another intrinsic function where the
3467/// argument is required but the value is not actually used.
3468///
3469/// \headerfile <x86intrin.h>
3470///
3471/// This intrinsic has no corresponding instruction.
3472///
3473/// \returns A 128-bit vector of [4 x i32] with unspecified content.
Michael Kupersteine45af542015-06-30 13:36:19 +00003474static __inline__ __m128i __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +00003475_mm_undefined_si128(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +00003476{
3477 return (__m128i)__builtin_ia32_undef128();
3478}
3479
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003480/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3481/// the specified 64-bit integer values.
3482///
3483/// \headerfile <x86intrin.h>
3484///
3485/// This intrinsic is a utility function and does not correspond to a specific
3486/// instruction.
3487///
3488/// \param __q1
3489/// A 64-bit integer value used to initialize the upper 64 bits of the
3490/// destination vector of [2 x i64].
3491/// \param __q0
3492/// A 64-bit integer value used to initialize the lower 64 bits of the
3493/// destination vector of [2 x i64].
3494/// \returns An initialized 128-bit vector of [2 x i64] containing the values
3495/// provided in the operands.
Simon Pilgrim5aba9922015-08-26 21:17:12 +00003496static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003497_mm_set_epi64x(long long __q1, long long __q0)
Anders Carlssondfa31172009-09-18 17:03:55 +00003498{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003499 return (__m128i){ __q0, __q1 };
Anders Carlssondfa31172009-09-18 17:03:55 +00003500}
3501
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003502/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3503/// the specified 64-bit integer values.
3504///
3505/// \headerfile <x86intrin.h>
3506///
3507/// This intrinsic is a utility function and does not correspond to a specific
3508/// instruction.
3509///
3510/// \param __q1
3511/// A 64-bit integer value used to initialize the upper 64 bits of the
3512/// destination vector of [2 x i64].
3513/// \param __q0
3514/// A 64-bit integer value used to initialize the lower 64 bits of the
3515/// destination vector of [2 x i64].
3516/// \returns An initialized 128-bit vector of [2 x i64] containing the values
3517/// provided in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00003518static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003519_mm_set_epi64(__m64 __q1, __m64 __q0)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003520{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003521 return (__m128i){ (long long)__q0, (long long)__q1 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003522}
3523
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003524/// \brief Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
3525/// the specified 32-bit integer values.
3526///
3527/// \headerfile <x86intrin.h>
3528///
3529/// This intrinsic is a utility function and does not correspond to a specific
3530/// instruction.
3531///
3532/// \param __i3
3533/// A 32-bit integer value used to initialize bits [127:96] of the
3534/// destination vector.
3535/// \param __i2
3536/// A 32-bit integer value used to initialize bits [95:64] of the destination
3537/// vector.
3538/// \param __i1
3539/// A 32-bit integer value used to initialize bits [63:32] of the destination
3540/// vector.
3541/// \param __i0
3542/// A 32-bit integer value used to initialize bits [31:0] of the destination
3543/// vector.
3544/// \returns An initialized 128-bit vector of [4 x i32] containing the values
3545/// provided in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00003546static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003547_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003548{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003549 return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003550}
3551
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003552/// \brief Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
3553/// the specified 16-bit integer values.
3554///
3555/// \headerfile <x86intrin.h>
3556///
3557/// This intrinsic is a utility function and does not correspond to a specific
3558/// instruction.
3559///
3560/// \param __w7
3561/// A 16-bit integer value used to initialize bits [127:112] of the
3562/// destination vector.
3563/// \param __w6
3564/// A 16-bit integer value used to initialize bits [111:96] of the
3565/// destination vector.
3566/// \param __w5
3567/// A 16-bit integer value used to initialize bits [95:80] of the destination
3568/// vector.
3569/// \param __w4
3570/// A 16-bit integer value used to initialize bits [79:64] of the destination
3571/// vector.
3572/// \param __w3
3573/// A 16-bit integer value used to initialize bits [63:48] of the destination
3574/// vector.
3575/// \param __w2
3576/// A 16-bit integer value used to initialize bits [47:32] of the destination
3577/// vector.
3578/// \param __w1
3579/// A 16-bit integer value used to initialize bits [31:16] of the destination
3580/// vector.
3581/// \param __w0
3582/// A 16-bit integer value used to initialize bits [15:0] of the destination
3583/// vector.
3584/// \returns An initialized 128-bit vector of [8 x i16] containing the values
3585/// provided in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00003586static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003587_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003588{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003589 return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003590}
3591
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003592/// \brief Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
3593/// the specified 8-bit integer values.
3594///
3595/// \headerfile <x86intrin.h>
3596///
3597/// This intrinsic is a utility function and does not correspond to a specific
3598/// instruction.
3599///
3600/// \param __b15
3601/// Initializes bits [127:120] of the destination vector.
3602/// \param __b14
3603/// Initializes bits [119:112] of the destination vector.
3604/// \param __b13
3605/// Initializes bits [111:104] of the destination vector.
3606/// \param __b12
3607/// Initializes bits [103:96] of the destination vector.
3608/// \param __b11
3609/// Initializes bits [95:88] of the destination vector.
3610/// \param __b10
3611/// Initializes bits [87:80] of the destination vector.
3612/// \param __b9
3613/// Initializes bits [79:72] of the destination vector.
3614/// \param __b8
3615/// Initializes bits [71:64] of the destination vector.
3616/// \param __b7
3617/// Initializes bits [63:56] of the destination vector.
3618/// \param __b6
3619/// Initializes bits [55:48] of the destination vector.
3620/// \param __b5
3621/// Initializes bits [47:40] of the destination vector.
3622/// \param __b4
3623/// Initializes bits [39:32] of the destination vector.
3624/// \param __b3
3625/// Initializes bits [31:24] of the destination vector.
3626/// \param __b2
3627/// Initializes bits [23:16] of the destination vector.
3628/// \param __b1
3629/// Initializes bits [15:8] of the destination vector.
3630/// \param __b0
3631/// Initializes bits [7:0] of the destination vector.
3632/// \returns An initialized 128-bit vector of [16 x i8] containing the values
3633/// provided in the operands.
Michael Kupersteine45af542015-06-30 13:36:19 +00003634static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003635_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003636{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003637 return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003638}
3639
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003640/// \brief Initializes both values in a 128-bit integer vector with the
3641/// specified 64-bit integer value.
3642///
3643/// \headerfile <x86intrin.h>
3644///
3645/// This intrinsic is a utility function and does not correspond to a specific
3646/// instruction.
3647///
3648/// \param __q
3649/// Integer value used to initialize the elements of the destination integer
3650/// vector.
3651/// \returns An initialized 128-bit integer vector of [2 x i64] with both
3652/// elements containing the value provided in the operand.
Michael Kupersteine45af542015-06-30 13:36:19 +00003653static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003654_mm_set1_epi64x(long long __q)
Anders Carlssondfa31172009-09-18 17:03:55 +00003655{
David Blaikie3302f2b2013-01-16 23:08:36 +00003656 return (__m128i){ __q, __q };
Anders Carlssondfa31172009-09-18 17:03:55 +00003657}
3658
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003659/// \brief Initializes both values in a 128-bit vector of [2 x i64] with the
3660/// specified 64-bit value.
3661///
3662/// \headerfile <x86intrin.h>
3663///
3664/// This intrinsic is a utility function and does not correspond to a specific
3665/// instruction.
3666///
3667/// \param __q
3668/// A 64-bit value used to initialize the elements of the destination integer
3669/// vector.
3670/// \returns An initialized 128-bit vector of [2 x i64] with all elements
3671/// containing the value provided in the operand.
Michael Kupersteine45af542015-06-30 13:36:19 +00003672static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003673_mm_set1_epi64(__m64 __q)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003674{
David Blaikie3302f2b2013-01-16 23:08:36 +00003675 return (__m128i){ (long long)__q, (long long)__q };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003676}
3677
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003678/// \brief Initializes all values in a 128-bit vector of [4 x i32] with the
3679/// specified 32-bit value.
3680///
3681/// \headerfile <x86intrin.h>
3682///
3683/// This intrinsic is a utility function and does not correspond to a specific
3684/// instruction.
3685///
3686/// \param __i
3687/// A 32-bit value used to initialize the elements of the destination integer
3688/// vector.
3689/// \returns An initialized 128-bit vector of [4 x i32] with all elements
3690/// containing the value provided in the operand.
Michael Kupersteine45af542015-06-30 13:36:19 +00003691static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003692_mm_set1_epi32(int __i)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003693{
David Blaikie3302f2b2013-01-16 23:08:36 +00003694 return (__m128i)(__v4si){ __i, __i, __i, __i };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003695}
3696
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003697/// \brief Initializes all values in a 128-bit vector of [8 x i16] with the
3698/// specified 16-bit value.
3699///
3700/// \headerfile <x86intrin.h>
3701///
3702/// This intrinsic is a utility function and does not correspond to a specific
3703/// instruction.
3704///
3705/// \param __w
3706/// A 16-bit value used to initialize the elements of the destination integer
3707/// vector.
3708/// \returns An initialized 128-bit vector of [8 x i16] with all elements
3709/// containing the value provided in the operand.
Michael Kupersteine45af542015-06-30 13:36:19 +00003710static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003711_mm_set1_epi16(short __w)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003712{
David Blaikie3302f2b2013-01-16 23:08:36 +00003713 return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003714}
3715
Ekaterina Romanovaf2ed6202016-04-08 20:45:48 +00003716/// \brief Initializes all values in a 128-bit vector of [16 x i8] with the
3717/// specified 8-bit value.
3718///
3719/// \headerfile <x86intrin.h>
3720///
3721/// This intrinsic is a utility function and does not correspond to a specific
3722/// instruction.
3723///
3724/// \param __b
3725/// An 8-bit value used to initialize the elements of the destination integer
3726/// vector.
3727/// \returns An initialized 128-bit vector of [16 x i8] with all elements
3728/// containing the value provided in the operand.
Michael Kupersteine45af542015-06-30 13:36:19 +00003729static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003730_mm_set1_epi8(char __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003731{
David Blaikie3302f2b2013-01-16 23:08:36 +00003732 return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003733}
3734
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003735/// \brief Constructs a 128-bit integer vector, initialized in reverse order
3736/// with the specified 64-bit integral values.
3737///
3738/// \headerfile <x86intrin.h>
3739///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00003740/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
3741/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003742///
3743/// \param __q0
3744/// A 64-bit integral value used to initialize the lower 64 bits of the
3745/// result.
3746/// \param __q1
3747/// A 64-bit integral value used to initialize the upper 64 bits of the
3748/// result.
3749/// \returns An initialized 128-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00003750static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003751_mm_setr_epi64(__m64 __q0, __m64 __q1)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003752{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003753 return (__m128i){ (long long)__q0, (long long)__q1 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003754}
3755
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003756/// \brief Constructs a 128-bit integer vector, initialized in reverse order
3757/// with the specified 32-bit integral values.
3758///
3759/// \headerfile <x86intrin.h>
3760///
3761/// This intrinsic is a utility function and does not correspond to a specific
3762/// instruction.
3763///
3764/// \param __i0
3765/// A 32-bit integral value used to initialize bits [31:0] of the result.
3766/// \param __i1
3767/// A 32-bit integral value used to initialize bits [63:32] of the result.
3768/// \param __i2
3769/// A 32-bit integral value used to initialize bits [95:64] of the result.
3770/// \param __i3
3771/// A 32-bit integral value used to initialize bits [127:96] of the result.
3772/// \returns An initialized 128-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00003773static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003774_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003775{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003776 return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003777}
3778
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003779/// \brief Constructs a 128-bit integer vector, initialized in reverse order
3780/// with the specified 16-bit integral values.
3781///
3782/// \headerfile <x86intrin.h>
3783///
3784/// This intrinsic is a utility function and does not correspond to a specific
3785/// instruction.
3786///
3787/// \param __w0
3788/// A 16-bit integral value used to initialize bits [15:0] of the result.
3789/// \param __w1
3790/// A 16-bit integral value used to initialize bits [31:16] of the result.
3791/// \param __w2
3792/// A 16-bit integral value used to initialize bits [47:32] of the result.
3793/// \param __w3
3794/// A 16-bit integral value used to initialize bits [63:48] of the result.
3795/// \param __w4
3796/// A 16-bit integral value used to initialize bits [79:64] of the result.
3797/// \param __w5
3798/// A 16-bit integral value used to initialize bits [95:80] of the result.
3799/// \param __w6
3800/// A 16-bit integral value used to initialize bits [111:96] of the result.
3801/// \param __w7
3802/// A 16-bit integral value used to initialize bits [127:112] of the result.
3803/// \returns An initialized 128-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00003804static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003805_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003806{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003807 return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003808}
3809
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003810/// \brief Constructs a 128-bit integer vector, initialized in reverse order
3811/// with the specified 8-bit integral values.
3812///
3813/// \headerfile <x86intrin.h>
3814///
3815/// This intrinsic is a utility function and does not correspond to a specific
3816/// instruction.
3817///
3818/// \param __b0
3819/// An 8-bit integral value used to initialize bits [7:0] of the result.
3820/// \param __b1
3821/// An 8-bit integral value used to initialize bits [15:8] of the result.
3822/// \param __b2
3823/// An 8-bit integral value used to initialize bits [23:16] of the result.
3824/// \param __b3
3825/// An 8-bit integral value used to initialize bits [31:24] of the result.
3826/// \param __b4
3827/// An 8-bit integral value used to initialize bits [39:32] of the result.
3828/// \param __b5
3829/// An 8-bit integral value used to initialize bits [47:40] of the result.
3830/// \param __b6
3831/// An 8-bit integral value used to initialize bits [55:48] of the result.
3832/// \param __b7
3833/// An 8-bit integral value used to initialize bits [63:56] of the result.
3834/// \param __b8
3835/// An 8-bit integral value used to initialize bits [71:64] of the result.
3836/// \param __b9
3837/// An 8-bit integral value used to initialize bits [79:72] of the result.
3838/// \param __b10
3839/// An 8-bit integral value used to initialize bits [87:80] of the result.
3840/// \param __b11
3841/// An 8-bit integral value used to initialize bits [95:88] of the result.
3842/// \param __b12
3843/// An 8-bit integral value used to initialize bits [103:96] of the result.
3844/// \param __b13
3845/// An 8-bit integral value used to initialize bits [111:104] of the result.
3846/// \param __b14
3847/// An 8-bit integral value used to initialize bits [119:112] of the result.
3848/// \param __b15
3849/// An 8-bit integral value used to initialize bits [127:120] of the result.
3850/// \returns An initialized 128-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00003851static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003852_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003853{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003854 return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003855}
3856
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003857/// \brief Creates a 128-bit integer vector initialized to zero.
3858///
3859/// \headerfile <x86intrin.h>
3860///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003861/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003862///
3863/// \returns An initialized 128-bit integer vector with all elements set to
3864/// zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00003865static __inline__ __m128i __DEFAULT_FN_ATTRS
Mike Stump5b31ed32009-02-13 14:24:50 +00003866_mm_setzero_si128(void)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003867{
3868 return (__m128i){ 0LL, 0LL };
3869}
3870
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003871/// \brief Stores a 128-bit integer vector to a memory location aligned on a
3872/// 128-bit boundary.
3873///
3874/// \headerfile <x86intrin.h>
3875///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003876/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003877///
3878/// \param __p
3879/// A pointer to an aligned memory location that will receive the integer
3880/// values.
3881/// \param __b
3882/// A 128-bit integer vector containing the values to be moved.
Michael Kupersteine45af542015-06-30 13:36:19 +00003883static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003884_mm_store_si128(__m128i *__p, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003885{
David Blaikie3302f2b2013-01-16 23:08:36 +00003886 *__p = __b;
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003887}
3888
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003889/// \brief Stores a 128-bit integer vector to an unaligned memory location.
3890///
3891/// \headerfile <x86intrin.h>
3892///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003893/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003894///
3895/// \param __p
3896/// A pointer to a memory location that will receive the integer values.
3897/// \param __b
3898/// A 128-bit integer vector containing the values to be moved.
Michael Kupersteine45af542015-06-30 13:36:19 +00003899static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003900_mm_storeu_si128(__m128i *__p, __m128i __b)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003901{
Craig Topper09175da2016-05-30 17:10:30 +00003902 struct __storeu_si128 {
3903 __m128i __v;
3904 } __attribute__((__packed__, __may_alias__));
3905 ((struct __storeu_si128*)__p)->__v = __b;
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003906}
3907
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003908/// \brief Moves bytes selected by the mask from the first operand to the
3909/// specified unaligned memory location. When a mask bit is 1, the
3910/// corresponding byte is written, otherwise it is not written. To minimize
3911/// caching, the date is flagged as non-temporal (unlikely to be used again
3912/// soon). Exception and trap behavior for elements not selected for storage
3913/// to memory are implementation dependent.
3914///
3915/// \headerfile <x86intrin.h>
3916///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00003917/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>
3918/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003919///
3920/// \param __d
3921/// A 128-bit integer vector containing the values to be moved.
3922/// \param __n
3923/// A 128-bit integer vector containing the mask. The most significant bit of
3924/// each byte represents the mask bits.
3925/// \param __p
3926/// A pointer to an unaligned 128-bit memory location where the specified
3927/// values are moved.
Michael Kupersteine45af542015-06-30 13:36:19 +00003928static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003929_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003930{
David Blaikie3302f2b2013-01-16 23:08:36 +00003931 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003932}
3933
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003934/// \brief Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to
3935/// a memory location.
3936///
3937/// \headerfile <x86intrin.h>
3938///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003939/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003940///
3941/// \param __p
3942/// A pointer to a 64-bit memory location that will receive the lower 64 bits
3943/// of the integer vector parameter.
3944/// \param __a
3945/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the
3946/// value to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00003947static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003948_mm_storel_epi64(__m128i *__p, __m128i __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003949{
Chad Rosier87622b82012-05-01 18:11:51 +00003950 struct __mm_storel_epi64_struct {
David Blaikie3302f2b2013-01-16 23:08:36 +00003951 long long __u;
Chad Rosier87622b82012-05-01 18:11:51 +00003952 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00003953 ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003954}
3955
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003956/// \brief Stores a 128-bit floating point vector of [2 x double] to a 128-bit
3957/// aligned memory location. To minimize caching, the data is flagged as
3958/// non-temporal (unlikely to be used again soon).
3959///
3960/// \headerfile <x86intrin.h>
3961///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003962/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003963///
3964/// \param __p
3965/// A pointer to the 128-bit aligned memory location used to store the value.
3966/// \param __a
3967/// A vector of [2 x double] containing the 64-bit values to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00003968static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003969_mm_stream_pd(double *__p, __m128d __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003970{
Simon Pilgrimbeca5f22016-06-13 09:57:52 +00003971 __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003972}
3973
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003974/// \brief Stores a 128-bit integer vector to a 128-bit aligned memory location.
3975/// To minimize caching, the data is flagged as non-temporal (unlikely to be
3976/// used again soon).
3977///
3978/// \headerfile <x86intrin.h>
3979///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003980/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003981///
3982/// \param __p
3983/// A pointer to the 128-bit aligned memory location used to store the value.
3984/// \param __a
3985/// A 128-bit integer vector containing the values to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00003986static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00003987_mm_stream_si128(__m128i *__p, __m128i __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003988{
Simon Pilgrimbeca5f22016-06-13 09:57:52 +00003989 __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00003990}
3991
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003992/// \brief Stores a 32-bit integer value in the specified memory location. To
3993/// minimize caching, the data is flagged as non-temporal (unlikely to be
3994/// used again soon).
3995///
3996/// \headerfile <x86intrin.h>
3997///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00003998/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00003999///
4000/// \param __p
4001/// A pointer to the 32-bit memory location used to store the value.
4002/// \param __a
4003/// A 32-bit integer containing the value to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00004004static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004005_mm_stream_si32(int *__p, int __a)
Anders Carlssona0d5ca22008-12-25 23:48:58 +00004006{
David Blaikie3302f2b2013-01-16 23:08:36 +00004007 __builtin_ia32_movnti(__p, __a);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00004008}
4009
Eli Friedmanf9d8c6c2013-09-23 23:38:39 +00004010#ifdef __x86_64__
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004011/// \brief Stores a 64-bit integer value in the specified memory location. To
4012/// minimize caching, the data is flagged as non-temporal (unlikely to be
4013/// used again soon).
4014///
4015/// \headerfile <x86intrin.h>
4016///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004017/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004018///
4019/// \param __p
4020/// A pointer to the 64-bit memory location used to store the value.
4021/// \param __a
4022/// A 64-bit integer containing the value to be stored.
Michael Kupersteine45af542015-06-30 13:36:19 +00004023static __inline__ void __DEFAULT_FN_ATTRS
Eli Friedmanf9d8c6c2013-09-23 23:38:39 +00004024_mm_stream_si64(long long *__p, long long __a)
4025{
4026 __builtin_ia32_movnti64(__p, __a);
4027}
4028#endif
4029
Albert Gutowski727ab8a2016-09-14 21:19:43 +00004030#if defined(__cplusplus)
4031extern "C" {
4032#endif
4033
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00004034/// \brief The cache line containing \a __p is flushed and invalidated from all
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004035/// caches in the coherency domain.
4036///
4037/// \headerfile <x86intrin.h>
4038///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004039/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004040///
4041/// \param __p
4042/// A pointer to the memory location used to identify the cache line to be
4043/// flushed.
Ekaterina Romanova2e041c92017-01-13 01:14:08 +00004044void _mm_clflush(void const * __p);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00004045
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004046/// \brief Forces strong memory ordering (serialization) between load
4047/// instructions preceding this instruction and load instructions following
4048/// this instruction, ensuring the system completes all previous loads before
4049/// executing subsequent loads.
4050///
4051/// \headerfile <x86intrin.h>
4052///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004053/// This intrinsic corresponds to the <c> LFENCE </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004054///
Albert Gutowski727ab8a2016-09-14 21:19:43 +00004055void _mm_lfence(void);
Anders Carlssona0d5ca22008-12-25 23:48:58 +00004056
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004057/// \brief Forces strong memory ordering (serialization) between load and store
4058/// instructions preceding this instruction and load and store instructions
4059/// following this instruction, ensuring that the system completes all
4060/// previous memory accesses before executing subsequent memory accesses.
4061///
4062/// \headerfile <x86intrin.h>
4063///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004064/// This intrinsic corresponds to the <c> MFENCE </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004065///
Albert Gutowski727ab8a2016-09-14 21:19:43 +00004066void _mm_mfence(void);
4067
4068#if defined(__cplusplus)
4069} // extern "C"
4070#endif
Anders Carlssona0d5ca22008-12-25 23:48:58 +00004071
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004072/// \brief Converts 16-bit signed integers from both 128-bit integer vector
4073/// operands into 8-bit signed integers, and packs the results into the
4074/// destination. Positive values greater than 0x7F are saturated to 0x7F.
4075/// Negative values less than 0x80 are saturated to 0x80.
4076///
4077/// \headerfile <x86intrin.h>
4078///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004079/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004080///
4081/// \param __a
4082/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4083/// a signed integer and is converted to a 8-bit signed integer with
4084/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less
4085/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are
4086/// written to the lower 64 bits of the result.
4087/// \param __b
4088/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4089/// a signed integer and is converted to a 8-bit signed integer with
4090/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less
4091/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are
4092/// written to the higher 64 bits of the result.
4093/// \returns A 128-bit vector of [16 x i8] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004094static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004095_mm_packs_epi16(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004096{
David Blaikie3302f2b2013-01-16 23:08:36 +00004097 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004098}
4099
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004100/// \brief Converts 32-bit signed integers from both 128-bit integer vector
4101/// operands into 16-bit signed integers, and packs the results into the
4102/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.
4103/// Negative values less than 0x8000 are saturated to 0x8000.
4104///
4105/// \headerfile <x86intrin.h>
4106///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004107/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004108///
4109/// \param __a
4110/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
4111/// a signed integer and is converted to a 16-bit signed integer with
4112/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
4113/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
4114/// are written to the lower 64 bits of the result.
4115/// \param __b
4116/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
4117/// a signed integer and is converted to a 16-bit signed integer with
4118/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
4119/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
4120/// are written to the higher 64 bits of the result.
4121/// \returns A 128-bit vector of [8 x i16] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004122static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004123_mm_packs_epi32(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004124{
David Blaikie3302f2b2013-01-16 23:08:36 +00004125 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004126}
4127
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004128/// \brief Converts 16-bit signed integers from both 128-bit integer vector
4129/// operands into 8-bit unsigned integers, and packs the results into the
4130/// destination. Values greater than 0xFF are saturated to 0xFF. Values less
4131/// than 0x00 are saturated to 0x00.
4132///
4133/// \headerfile <x86intrin.h>
4134///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004135/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004136///
4137/// \param __a
4138/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4139/// a signed integer and is converted to an 8-bit unsigned integer with
4140/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
4141/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are
4142/// written to the lower 64 bits of the result.
4143/// \param __b
4144/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4145/// a signed integer and is converted to an 8-bit unsigned integer with
4146/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
4147/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are
4148/// written to the higher 64 bits of the result.
4149/// \returns A 128-bit vector of [16 x i8] containing the converted values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004150static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004151_mm_packus_epi16(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004152{
David Blaikie3302f2b2013-01-16 23:08:36 +00004153 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004154}
4155
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004156/// \brief Extracts 16 bits from a 128-bit integer vector of [8 x i16], using
4157/// the immediate-value parameter as a selector.
4158///
4159/// \headerfile <x86intrin.h>
4160///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004161/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004162///
4163/// \param __a
4164/// A 128-bit integer vector.
4165/// \param __imm
Ekaterina Romanova2e041c92017-01-13 01:14:08 +00004166/// An immediate value. Bits [2:0] selects values from \a __a to be assigned
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004167/// to bits[15:0] of the result. \n
4168/// 000: assign values from bits [15:0] of \a __a. \n
4169/// 001: assign values from bits [31:16] of \a __a. \n
4170/// 010: assign values from bits [47:32] of \a __a. \n
4171/// 011: assign values from bits [63:48] of \a __a. \n
4172/// 100: assign values from bits [79:64] of \a __a. \n
4173/// 101: assign values from bits [95:80] of \a __a. \n
4174/// 110: assign values from bits [111:96] of \a __a. \n
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00004175/// 111: assign values from bits [127:112] of \a __a.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004176/// \returns An integer, whose lower 16 bits are selected from the 128-bit
4177/// integer vector parameter and the remaining bits are assigned zeros.
Michael Kupersteine45af542015-06-30 13:36:19 +00004178static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004179_mm_extract_epi16(__m128i __a, int __imm)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004180{
David Blaikie3302f2b2013-01-16 23:08:36 +00004181 __v8hi __b = (__v8hi)__a;
Manman Renbe38b9e2013-10-22 19:24:42 +00004182 return (unsigned short)__b[__imm & 7];
Anders Carlsson85eb1242008-12-26 00:45:50 +00004183}
4184
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004185/// \brief Constructs a 128-bit integer vector by first making a copy of the
4186/// 128-bit integer vector parameter, and then inserting the lower 16 bits
4187/// of an integer parameter into an offset specified by the immediate-value
4188/// parameter.
4189///
4190/// \headerfile <x86intrin.h>
4191///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004192/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004193///
4194/// \param __a
4195/// A 128-bit integer vector of [8 x i16]. This vector is copied to the
4196/// result and then one of the eight elements in the result is replaced by
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00004197/// the lower 16 bits of \a __b.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004198/// \param __b
4199/// An integer. The lower 16 bits of this parameter are written to the
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00004200/// result beginning at an offset specified by \a __imm.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004201/// \param __imm
4202/// An immediate value specifying the bit offset in the result at which the
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00004203/// lower 16 bits of \a __b are written.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004204/// \returns A 128-bit integer vector containing the constructed values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004205static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004206_mm_insert_epi16(__m128i __a, int __b, int __imm)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004207{
David Blaikie3302f2b2013-01-16 23:08:36 +00004208 __v8hi __c = (__v8hi)__a;
4209 __c[__imm & 7] = __b;
4210 return (__m128i)__c;
Anders Carlsson85eb1242008-12-26 00:45:50 +00004211}
4212
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004213/// \brief Copies the values of the most significant bits from each 8-bit
4214/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
4215/// value, zero-extends the value, and writes it to the destination.
4216///
4217/// \headerfile <x86intrin.h>
4218///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004219/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004220///
4221/// \param __a
4222/// A 128-bit integer vector containing the values with bits to be extracted.
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00004223/// \returns The most significant bits from each 8-bit element in \a __a,
4224/// written to bits [15:0]. The other bits are assigned zeros.
Michael Kupersteine45af542015-06-30 13:36:19 +00004225static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004226_mm_movemask_epi8(__m128i __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004227{
David Blaikie3302f2b2013-01-16 23:08:36 +00004228 return __builtin_ia32_pmovmskb128((__v16qi)__a);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004229}
4230
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004231/// \brief Constructs a 128-bit integer vector by shuffling four 32-bit
4232/// elements of a 128-bit integer vector parameter, using the immediate-value
4233/// parameter as a specifier.
4234///
4235/// \headerfile <x86intrin.h>
4236///
4237/// \code
4238/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);
4239/// \endcode
4240///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004241/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004242///
4243/// \param a
4244/// A 128-bit integer vector containing the values to be copied.
4245/// \param imm
4246/// An immediate value containing an 8-bit value specifying which elements to
4247/// copy from a. The destinations within the 128-bit destination are assigned
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004248/// values as follows: \n
4249/// Bits [1:0] are used to assign values to bits [31:0] of the result. \n
4250/// Bits [3:2] are used to assign values to bits [63:32] of the result. \n
4251/// Bits [5:4] are used to assign values to bits [95:64] of the result. \n
4252/// Bits [7:6] are used to assign values to bits [127:96] of the result. \n
4253/// Bit value assignments: \n
4254/// 00: assign values from bits [31:0] of \a a. \n
4255/// 01: assign values from bits [63:32] of \a a. \n
4256/// 10: assign values from bits [95:64] of \a a. \n
4257/// 11: assign values from bits [127:96] of \a a.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004258/// \returns A 128-bit integer vector containing the shuffled values.
Bob Wilsonc9b97cc2011-11-05 06:08:06 +00004259#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
Craig Topper51e47412015-02-13 06:04:43 +00004260 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
Craig Topper2a383c92016-07-04 22:18:01 +00004261 (__v4si)_mm_undefined_si128(), \
4262 ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
4263 ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); })
Chris Lattnerf03406f2011-04-25 20:42:40 +00004264
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004265/// \brief Constructs a 128-bit integer vector by shuffling four lower 16-bit
4266/// elements of a 128-bit integer vector of [8 x i16], using the immediate
4267/// value parameter as a specifier.
4268///
4269/// \headerfile <x86intrin.h>
4270///
4271/// \code
4272/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);
4273/// \endcode
4274///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004275/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004276///
4277/// \param a
4278/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
4279/// [127:64] of the result.
4280/// \param imm
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004281/// An 8-bit immediate value specifying which elements to copy from \a a. \n
4282/// Bits[1:0] are used to assign values to bits [15:0] of the result. \n
4283/// Bits[3:2] are used to assign values to bits [31:16] of the result. \n
4284/// Bits[5:4] are used to assign values to bits [47:32] of the result. \n
4285/// Bits[7:6] are used to assign values to bits [63:48] of the result. \n
4286/// Bit value assignments: \n
4287/// 00: assign values from bits [15:0] of \a a. \n
4288/// 01: assign values from bits [31:16] of \a a. \n
4289/// 10: assign values from bits [47:32] of \a a. \n
4290/// 11: assign values from bits [63:48] of \a a. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004291/// \returns A 128-bit integer vector containing the shuffled values.
Bob Wilsonc9b97cc2011-11-05 06:08:06 +00004292#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
Craig Topper51e47412015-02-13 06:04:43 +00004293 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
Craig Topper2a383c92016-07-04 22:18:01 +00004294 (__v8hi)_mm_undefined_si128(), \
4295 ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
4296 ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \
Bob Wilsonc9b97cc2011-11-05 06:08:06 +00004297 4, 5, 6, 7); })
Chris Lattnerf03406f2011-04-25 20:42:40 +00004298
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004299/// \brief Constructs a 128-bit integer vector by shuffling four upper 16-bit
4300/// elements of a 128-bit integer vector of [8 x i16], using the immediate
4301/// value parameter as a specifier.
4302///
4303/// \headerfile <x86intrin.h>
4304///
4305/// \code
4306/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);
4307/// \endcode
4308///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004309/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004310///
4311/// \param a
4312/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
4313/// [63:0] of the result.
4314/// \param imm
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004315/// An 8-bit immediate value specifying which elements to copy from \a a. \n
4316/// Bits[1:0] are used to assign values to bits [79:64] of the result. \n
4317/// Bits[3:2] are used to assign values to bits [95:80] of the result. \n
4318/// Bits[5:4] are used to assign values to bits [111:96] of the result. \n
4319/// Bits[7:6] are used to assign values to bits [127:112] of the result. \n
4320/// Bit value assignments: \n
4321/// 00: assign values from bits [79:64] of \a a. \n
4322/// 01: assign values from bits [95:80] of \a a. \n
4323/// 10: assign values from bits [111:96] of \a a. \n
4324/// 11: assign values from bits [127:112] of \a a. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004325/// \returns A 128-bit integer vector containing the shuffled values.
Bob Wilsonc9b97cc2011-11-05 06:08:06 +00004326#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
Craig Topper51e47412015-02-13 06:04:43 +00004327 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
Craig Topper2a383c92016-07-04 22:18:01 +00004328 (__v8hi)_mm_undefined_si128(), \
Bob Wilsonc9b97cc2011-11-05 06:08:06 +00004329 0, 1, 2, 3, \
Craig Topper2a383c92016-07-04 22:18:01 +00004330 4 + (((imm) >> 0) & 0x3), \
4331 4 + (((imm) >> 2) & 0x3), \
4332 4 + (((imm) >> 4) & 0x3), \
4333 4 + (((imm) >> 6) & 0x3)); })
Anders Carlsson85eb1242008-12-26 00:45:50 +00004334
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004335/// \brief Unpacks the high-order (index 8-15) values from two 128-bit vectors
4336/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4337///
4338/// \headerfile <x86intrin.h>
4339///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004340/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>
4341/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004342///
4343/// \param __a
4344/// A 128-bit vector of [16 x i8].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004345/// Bits [71:64] are written to bits [7:0] of the result. \n
4346/// Bits [79:72] are written to bits [23:16] of the result. \n
4347/// Bits [87:80] are written to bits [39:32] of the result. \n
4348/// Bits [95:88] are written to bits [55:48] of the result. \n
4349/// Bits [103:96] are written to bits [71:64] of the result. \n
4350/// Bits [111:104] are written to bits [87:80] of the result. \n
4351/// Bits [119:112] are written to bits [103:96] of the result. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004352/// Bits [127:120] are written to bits [119:112] of the result.
4353/// \param __b
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004354/// A 128-bit vector of [16 x i8]. \n
4355/// Bits [71:64] are written to bits [15:8] of the result. \n
4356/// Bits [79:72] are written to bits [31:24] of the result. \n
4357/// Bits [87:80] are written to bits [47:40] of the result. \n
4358/// Bits [95:88] are written to bits [63:56] of the result. \n
4359/// Bits [103:96] are written to bits [79:72] of the result. \n
4360/// Bits [111:104] are written to bits [95:88] of the result. \n
4361/// Bits [119:112] are written to bits [111:104] of the result. \n
4362/// Bits [127:120] are written to bits [127:120] of the result.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004363/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004364static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004365_mm_unpackhi_epi8(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004366{
David Blaikie3302f2b2013-01-16 23:08:36 +00004367 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004368}
4369
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004370/// \brief Unpacks the high-order (index 4-7) values from two 128-bit vectors of
4371/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].
4372///
4373/// \headerfile <x86intrin.h>
4374///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004375/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>
4376/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004377///
4378/// \param __a
4379/// A 128-bit vector of [8 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004380/// Bits [79:64] are written to bits [15:0] of the result. \n
4381/// Bits [95:80] are written to bits [47:32] of the result. \n
4382/// Bits [111:96] are written to bits [79:64] of the result. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004383/// Bits [127:112] are written to bits [111:96] of the result.
4384/// \param __b
4385/// A 128-bit vector of [8 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004386/// Bits [79:64] are written to bits [31:16] of the result. \n
4387/// Bits [95:80] are written to bits [63:48] of the result. \n
4388/// Bits [111:96] are written to bits [95:80] of the result. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004389/// Bits [127:112] are written to bits [127:112] of the result.
4390/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004391static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004392_mm_unpackhi_epi16(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004393{
David Blaikie3302f2b2013-01-16 23:08:36 +00004394 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004395}
4396
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004397/// \brief Unpacks the high-order (index 2,3) values from two 128-bit vectors of
4398/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4399///
4400/// \headerfile <x86intrin.h>
4401///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004402/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>
4403/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004404///
4405/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004406/// A 128-bit vector of [4 x i32]. \n
4407/// Bits [95:64] are written to bits [31:0] of the destination. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004408/// Bits [127:96] are written to bits [95:64] of the destination.
4409/// \param __b
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004410/// A 128-bit vector of [4 x i32]. \n
4411/// Bits [95:64] are written to bits [64:32] of the destination. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004412/// Bits [127:96] are written to bits [127:96] of the destination.
4413/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004414static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004415_mm_unpackhi_epi32(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004416{
David Blaikie3302f2b2013-01-16 23:08:36 +00004417 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004418}
4419
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004420/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
4421/// of [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4422///
4423/// \headerfile <x86intrin.h>
4424///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004425/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>
4426/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004427///
4428/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004429/// A 128-bit vector of [2 x i64]. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004430/// Bits [127:64] are written to bits [63:0] of the destination.
4431/// \param __b
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004432/// A 128-bit vector of [2 x i64]. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004433/// Bits [127:64] are written to bits [127:64] of the destination.
4434/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004435static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004436_mm_unpackhi_epi64(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004437{
Craig Topper1aa231e2016-05-16 06:38:42 +00004438 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004439}
4440
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004441/// \brief Unpacks the low-order (index 0-7) values from two 128-bit vectors of
4442/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4443///
4444/// \headerfile <x86intrin.h>
4445///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004446/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>
4447/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004448///
4449/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004450/// A 128-bit vector of [16 x i8]. \n
4451/// Bits [7:0] are written to bits [7:0] of the result. \n
4452/// Bits [15:8] are written to bits [23:16] of the result. \n
4453/// Bits [23:16] are written to bits [39:32] of the result. \n
4454/// Bits [31:24] are written to bits [55:48] of the result. \n
4455/// Bits [39:32] are written to bits [71:64] of the result. \n
4456/// Bits [47:40] are written to bits [87:80] of the result. \n
4457/// Bits [55:48] are written to bits [103:96] of the result. \n
4458/// Bits [63:56] are written to bits [119:112] of the result.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004459/// \param __b
4460/// A 128-bit vector of [16 x i8].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004461/// Bits [7:0] are written to bits [15:8] of the result. \n
4462/// Bits [15:8] are written to bits [31:24] of the result. \n
4463/// Bits [23:16] are written to bits [47:40] of the result. \n
4464/// Bits [31:24] are written to bits [63:56] of the result. \n
4465/// Bits [39:32] are written to bits [79:72] of the result. \n
4466/// Bits [47:40] are written to bits [95:88] of the result. \n
4467/// Bits [55:48] are written to bits [111:104] of the result. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004468/// Bits [63:56] are written to bits [127:120] of the result.
4469/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004470static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004471_mm_unpacklo_epi8(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004472{
David Blaikie3302f2b2013-01-16 23:08:36 +00004473 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004474}
4475
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004476/// \brief Unpacks the low-order (index 0-3) values from each of the two 128-bit
4477/// vectors of [8 x i16] and interleaves them into a 128-bit vector of
4478/// [8 x i16].
4479///
4480/// \headerfile <x86intrin.h>
4481///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004482/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>
4483/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004484///
4485/// \param __a
4486/// A 128-bit vector of [8 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004487/// Bits [15:0] are written to bits [15:0] of the result. \n
4488/// Bits [31:16] are written to bits [47:32] of the result. \n
4489/// Bits [47:32] are written to bits [79:64] of the result. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004490/// Bits [63:48] are written to bits [111:96] of the result.
4491/// \param __b
4492/// A 128-bit vector of [8 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004493/// Bits [15:0] are written to bits [31:16] of the result. \n
4494/// Bits [31:16] are written to bits [63:48] of the result. \n
4495/// Bits [47:32] are written to bits [95:80] of the result. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004496/// Bits [63:48] are written to bits [127:112] of the result.
4497/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004498static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004499_mm_unpacklo_epi16(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004500{
David Blaikie3302f2b2013-01-16 23:08:36 +00004501 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004502}
4503
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004504/// \brief Unpacks the low-order (index 0,1) values from two 128-bit vectors of
4505/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4506///
4507/// \headerfile <x86intrin.h>
4508///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004509/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>
4510/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004511///
4512/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004513/// A 128-bit vector of [4 x i32]. \n
4514/// Bits [31:0] are written to bits [31:0] of the destination. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004515/// Bits [63:32] are written to bits [95:64] of the destination.
4516/// \param __b
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004517/// A 128-bit vector of [4 x i32]. \n
4518/// Bits [31:0] are written to bits [64:32] of the destination. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004519/// Bits [63:32] are written to bits [127:96] of the destination.
4520/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004521static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004522_mm_unpacklo_epi32(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004523{
David Blaikie3302f2b2013-01-16 23:08:36 +00004524 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004525}
4526
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004527/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors of
4528/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4529///
4530/// \headerfile <x86intrin.h>
4531///
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004532/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
4533/// instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004534///
4535/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004536/// A 128-bit vector of [2 x i64]. \n
4537/// Bits [63:0] are written to bits [63:0] of the destination. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004538/// \param __b
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004539/// A 128-bit vector of [2 x i64]. \n
4540/// Bits [63:0] are written to bits [127:64] of the destination. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004541/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004542static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004543_mm_unpacklo_epi64(__m128i __a, __m128i __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004544{
Craig Topper1aa231e2016-05-16 06:38:42 +00004545 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004546}
4547
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004548/// \brief Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
Ekaterina Romanova493091f2016-10-20 17:59:15 +00004549/// integer.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004550///
4551/// \headerfile <x86intrin.h>
4552///
4553/// This intrinsic has no corresponding instruction.
4554///
4555/// \param __a
4556/// A 128-bit integer vector operand. The lower 64 bits are moved to the
4557/// destination.
4558/// \returns A 64-bit integer containing the lower 64 bits of the parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00004559static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004560_mm_movepi64_pi64(__m128i __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004561{
David Blaikie3302f2b2013-01-16 23:08:36 +00004562 return (__m64)__a[0];
Anders Carlsson85eb1242008-12-26 00:45:50 +00004563}
4564
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004565/// \brief Moves the 64-bit operand to a 128-bit integer vector, zeroing the
4566/// upper bits.
4567///
4568/// \headerfile <x86intrin.h>
4569///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004570/// This intrinsic corresponds to the <c> VMOVQ / MOVQ / MOVD </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004571///
4572/// \param __a
4573/// A 64-bit value.
4574/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4575/// the operand. The upper 64 bits are assigned zeros.
Michael Kupersteine45af542015-06-30 13:36:19 +00004576static __inline__ __m128i __DEFAULT_FN_ATTRS
Alp Tokerd480b1b2013-11-23 22:11:57 +00004577_mm_movpi64_epi64(__m64 __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004578{
David Blaikie3302f2b2013-01-16 23:08:36 +00004579 return (__m128i){ (long long)__a, 0 };
Anders Carlsson85eb1242008-12-26 00:45:50 +00004580}
4581
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004582/// \brief Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
4583/// integer vector, zeroing the upper bits.
4584///
4585/// \headerfile <x86intrin.h>
4586///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004587/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004588///
4589/// \param __a
4590/// A 128-bit integer vector operand. The lower 64 bits are moved to the
4591/// destination.
4592/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4593/// the operand. The upper 64 bits are assigned zeros.
Michael Kupersteine45af542015-06-30 13:36:19 +00004594static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004595_mm_move_epi64(__m128i __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004596{
Craig Topper1aa231e2016-05-16 06:38:42 +00004597 return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004598}
4599
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004600/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
4601/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
4602/// double].
4603///
4604/// \headerfile <x86intrin.h>
4605///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004606/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004607///
4608/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004609/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004610/// Bits [127:64] are written to bits [63:0] of the destination.
4611/// \param __b
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004612/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004613/// Bits [127:64] are written to bits [127:64] of the destination.
4614/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004615static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004616_mm_unpackhi_pd(__m128d __a, __m128d __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004617{
Craig Topper1aa231e2016-05-16 06:38:42 +00004618 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004619}
4620
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004621/// \brief Unpacks the low-order (even-indexed) values from two 128-bit vectors
4622/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
4623/// double].
4624///
4625/// \headerfile <x86intrin.h>
4626///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004627/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004628///
4629/// \param __a
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004630/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004631/// Bits [63:0] are written to bits [63:0] of the destination.
4632/// \param __b
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004633/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004634/// Bits [63:0] are written to bits [127:64] of the destination.
4635/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
Michael Kupersteine45af542015-06-30 13:36:19 +00004636static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004637_mm_unpacklo_pd(__m128d __a, __m128d __b)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004638{
Craig Topper1aa231e2016-05-16 06:38:42 +00004639 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004640}
4641
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004642/// \brief Extracts the sign bits of the double-precision values in the 128-bit
4643/// vector of [2 x double], zero-extends the value, and writes it to the
4644/// low-order bits of the destination.
4645///
4646/// \headerfile <x86intrin.h>
4647///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004648/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004649///
4650/// \param __a
4651/// A 128-bit vector of [2 x double] containing the values with sign bits to
4652/// be extracted.
Ekaterina Romanova797b0eb2016-12-08 22:10:51 +00004653/// \returns The sign bits from each of the double-precision elements in \a __a,
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004654/// written to bits [1:0]. The remaining bits are assigned values of zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00004655static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00004656_mm_movemask_pd(__m128d __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004657{
Craig Topper1aa231e2016-05-16 06:38:42 +00004658 return __builtin_ia32_movmskpd((__v2df)__a);
Anders Carlsson85eb1242008-12-26 00:45:50 +00004659}
4660
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004661
4662/// \brief Constructs a 128-bit floating-point vector of [2 x double] from two
4663/// 128-bit vector parameters of [2 x double], using the immediate-value
4664/// parameter as a specifier.
4665///
4666/// \headerfile <x86intrin.h>
4667///
4668/// \code
4669/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);
4670/// \endcode
4671///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004672/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004673///
4674/// \param a
4675/// A 128-bit vector of [2 x double].
4676/// \param b
4677/// A 128-bit vector of [2 x double].
4678/// \param i
4679/// An 8-bit immediate value. The least significant two bits specify which
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +00004680/// elements to copy from a and b: \n
4681/// Bit[0] = 0: lower element of a copied to lower element of result. \n
4682/// Bit[0] = 1: upper element of a copied to lower element of result. \n
4683/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
4684/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004685/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
Bob Wilsonc9b97cc2011-11-05 06:08:06 +00004686#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
Craig Topperd619eaaa2015-11-11 03:47:10 +00004687 (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
Craig Topper2a383c92016-07-04 22:18:01 +00004688 0 + (((i) >> 0) & 0x1), \
4689 2 + (((i) >> 1) & 0x1)); })
Anders Carlsson85eb1242008-12-26 00:45:50 +00004690
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004691/// \brief Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4692/// floating-point vector of [4 x float].
4693///
4694/// \headerfile <x86intrin.h>
4695///
4696/// This intrinsic has no corresponding instruction.
4697///
4698/// \param __a
4699/// A 128-bit floating-point vector of [2 x double].
4700/// \returns A 128-bit floating-point vector of [4 x float] containing the same
4701/// bitwise pattern as the parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00004702static __inline__ __m128 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004703_mm_castpd_ps(__m128d __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004704{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004705 return (__m128)__a;
Anders Carlsson85eb1242008-12-26 00:45:50 +00004706}
4707
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004708/// \brief Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4709/// integer vector.
4710///
4711/// \headerfile <x86intrin.h>
4712///
4713/// This intrinsic has no corresponding instruction.
4714///
4715/// \param __a
4716/// A 128-bit floating-point vector of [2 x double].
4717/// \returns A 128-bit integer vector containing the same bitwise pattern as the
4718/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00004719static __inline__ __m128i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004720_mm_castpd_si128(__m128d __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004721{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004722 return (__m128i)__a;
Anders Carlsson85eb1242008-12-26 00:45:50 +00004723}
4724
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004725/// \brief Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4726/// floating-point vector of [2 x double].
4727///
4728/// \headerfile <x86intrin.h>
4729///
4730/// This intrinsic has no corresponding instruction.
4731///
4732/// \param __a
4733/// A 128-bit floating-point vector of [4 x float].
4734/// \returns A 128-bit floating-point vector of [2 x double] containing the same
4735/// bitwise pattern as the parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00004736static __inline__ __m128d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004737_mm_castps_pd(__m128 __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004738{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004739 return (__m128d)__a;
Anders Carlsson85eb1242008-12-26 00:45:50 +00004740}
4741
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004742/// \brief Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4743/// integer vector.
4744///
4745/// \headerfile <x86intrin.h>
4746///
4747/// This intrinsic has no corresponding instruction.
4748///
4749/// \param __a
4750/// A 128-bit floating-point vector of [4 x float].
4751/// \returns A 128-bit integer vector containing the same bitwise pattern as the
4752/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00004753static __inline__ __m128i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004754_mm_castps_si128(__m128 __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004755{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004756 return (__m128i)__a;
Anders Carlsson85eb1242008-12-26 00:45:50 +00004757}
4758
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004759/// \brief Casts a 128-bit integer vector into a 128-bit floating-point vector
4760/// of [4 x float].
4761///
4762/// \headerfile <x86intrin.h>
4763///
4764/// This intrinsic has no corresponding instruction.
4765///
4766/// \param __a
4767/// A 128-bit integer vector.
4768/// \returns A 128-bit floating-point vector of [4 x float] containing the same
4769/// bitwise pattern as the parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00004770static __inline__ __m128 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004771_mm_castsi128_ps(__m128i __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004772{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004773 return (__m128)__a;
Anders Carlsson85eb1242008-12-26 00:45:50 +00004774}
4775
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004776/// \brief Casts a 128-bit integer vector into a 128-bit floating-point vector
4777/// of [2 x double].
4778///
4779/// \headerfile <x86intrin.h>
4780///
4781/// This intrinsic has no corresponding instruction.
4782///
4783/// \param __a
4784/// A 128-bit integer vector.
4785/// \returns A 128-bit floating-point vector of [2 x double] containing the same
4786/// bitwise pattern as the parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00004787static __inline__ __m128d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004788_mm_castsi128_pd(__m128i __a)
Anders Carlsson85eb1242008-12-26 00:45:50 +00004789{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00004790 return (__m128d)__a;
Anders Carlsson85eb1242008-12-26 00:45:50 +00004791}
4792
Ekaterina Romanova2174b6f2016-11-17 23:02:00 +00004793#if defined(__cplusplus)
4794extern "C" {
4795#endif
4796
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004797/// \brief Indicates that a spin loop is being executed for the purposes of
4798/// optimizing power consumption during the loop.
4799///
4800/// \headerfile <x86intrin.h>
4801///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00004802/// This intrinsic corresponds to the <c> PAUSE </c> instruction.
Ekaterina Romanovaa84c24f2016-07-22 23:49:37 +00004803///
Albert Gutowski727ab8a2016-09-14 21:19:43 +00004804void _mm_pause(void);
Anders Carlsson37c23712008-12-26 00:49:43 +00004805
Ekaterina Romanova2174b6f2016-11-17 23:02:00 +00004806#if defined(__cplusplus)
4807} // extern "C"
4808#endif
Michael Kupersteine45af542015-06-30 13:36:19 +00004809#undef __DEFAULT_FN_ATTRS
Eric Christopher4d1851682015-06-17 07:09:20 +00004810
Anders Carlsson43c2bab2009-01-21 01:49:39 +00004811#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
Anders Carlsson37c23712008-12-26 00:49:43 +00004812
Anders Carlssonf15e71d2008-12-24 01:45:22 +00004813#endif /* __EMMINTRIN_H */