blob: 4b38d51713d868dcf691012653473a830a2b0561 [file] [log] [blame]
Anders Carlssona5e2e602008-03-03 19:29:06 +00001/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
Anders Carlssona5e2e602008-03-03 19:29:06 +00003 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __MMINTRIN_H
25#define __MMINTRIN_H
26
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +000027typedef long long __m64 __attribute__((__vector_size__(8)));
Anders Carlssona5e2e602008-03-03 19:29:06 +000028
Craig Topper1aa231e2016-05-16 06:38:42 +000029typedef long long __v1di __attribute__((__vector_size__(8)));
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +000030typedef int __v2si __attribute__((__vector_size__(8)));
31typedef short __v4hi __attribute__((__vector_size__(8)));
Anders Carlsson327c8df2009-09-18 19:18:19 +000032typedef char __v8qi __attribute__((__vector_size__(8)));
Anders Carlssona5e2e602008-03-03 19:29:06 +000033
Eric Christopher4d1851682015-06-17 07:09:20 +000034/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +000035#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
Eric Christopher4d1851682015-06-17 07:09:20 +000036
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000037/// \brief Clears the MMX state by setting the state of the x87 stack registers
38/// to empty.
39///
40/// \headerfile <x86intrin.h>
41///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +000042/// This intrinsic corresponds to the <c> EMMS </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000043///
Michael Kupersteine45af542015-06-30 13:36:19 +000044static __inline__ void __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000045_mm_empty(void)
Anders Carlssona5e2e602008-03-03 19:29:06 +000046{
47 __builtin_ia32_emms();
48}
49
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000050/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
51/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
52///
53/// \headerfile <x86intrin.h>
54///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +000055/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000056///
57/// \param __i
58/// A 32-bit integer value.
59/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
60/// parameter. The upper 32 bits are set to 0.
Michael Kupersteine45af542015-06-30 13:36:19 +000061static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000062_mm_cvtsi32_si64(int __i)
Anders Carlssona5e2e602008-03-03 19:29:06 +000063{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +000064 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
Anders Carlssona5e2e602008-03-03 19:29:06 +000065}
66
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000067/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
68/// signed integer.
69///
70/// \headerfile <x86intrin.h>
71///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +000072/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000073///
74/// \param __m
75/// A 64-bit integer vector.
76/// \returns A 32-bit signed integer value containing the lower 32 bits of the
77/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +000078static __inline__ int __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000079_mm_cvtsi64_si32(__m64 __m)
Anders Carlssona5e2e602008-03-03 19:29:06 +000080{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +000081 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
Anders Carlssona5e2e602008-03-03 19:29:06 +000082}
83
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000084/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
85///
86/// \headerfile <x86intrin.h>
87///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +000088/// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000089///
90/// \param __i
91/// A 64-bit signed integer.
92/// \returns A 64-bit integer vector containing the same bitwise pattern as the
93/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +000094static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000095_mm_cvtsi64_m64(long long __i)
Anders Carlssona5e2e602008-03-03 19:29:06 +000096{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +000097 return (__m64)__i;
Anders Carlssona5e2e602008-03-03 19:29:06 +000098}
99
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000100/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
101///
102/// \headerfile <x86intrin.h>
103///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000104/// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000105///
106/// \param __m
107/// A 64-bit integer vector.
108/// \returns A 64-bit signed integer containing the same bitwise pattern as the
109/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +0000110static __inline__ long long __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000111_mm_cvtm64_si64(__m64 __m)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000112{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000113 return (long long)__m;
Anders Carlssona5e2e602008-03-03 19:29:06 +0000114}
115
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000116/// \brief Converts 16-bit signed integers from both 64-bit integer vector
117/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
118/// a 64-bit integer vector of [8 x i8] as the result. Positive values
119/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
120/// are saturated to 0x80.
121///
122/// \headerfile <x86intrin.h>
123///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000124/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000125///
126/// \param __m1
127/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
128/// 16-bit signed integer and is converted to an 8-bit signed integer with
129/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
130/// Negative values less than 0x80 are saturated to 0x80. The converted
131/// [4 x i8] values are written to the lower 32 bits of the result.
132/// \param __m2
133/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
134/// 16-bit signed integer and is converted to an 8-bit signed integer with
135/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
136/// Negative values less than 0x80 are saturated to 0x80. The converted
137/// [4 x i8] values are written to the upper 32 bits of the result.
138/// \returns A 64-bit integer vector of [8 x i8] containing the converted
139/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000140static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000141_mm_packs_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000142{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000143 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000144}
145
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000146/// \brief Converts 32-bit signed integers from both 64-bit integer vector
147/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
148/// a 64-bit integer vector of [4 x i16] as the result. Positive values
149/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
150/// 0x8000 are saturated to 0x8000.
151///
152/// \headerfile <x86intrin.h>
153///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000154/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000155///
156/// \param __m1
157/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
158/// 32-bit signed integer and is converted to a 16-bit signed integer with
159/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
160/// Negative values less than 0x8000 are saturated to 0x8000. The converted
161/// [2 x i16] values are written to the lower 32 bits of the result.
162/// \param __m2
163/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
164/// 32-bit signed integer and is converted to a 16-bit signed integer with
165/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
166/// Negative values less than 0x8000 are saturated to 0x8000. The converted
167/// [2 x i16] values are written to the upper 32 bits of the result.
168/// \returns A 64-bit integer vector of [4 x i16] containing the converted
169/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000170static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000171_mm_packs_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000172{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000173 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000174}
175
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000176/// \brief Converts 16-bit signed integers from both 64-bit integer vector
177/// parameters of [4 x i16] into 8-bit unsigned integer values, and
178/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
179/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
180/// to 0.
181///
182/// \headerfile <x86intrin.h>
183///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000184/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000185///
186/// \param __m1
187/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
188/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
189/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
190/// than 0 are saturated to 0. The converted [4 x i8] values are written to
191/// the lower 32 bits of the result.
192/// \param __m2
193/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
194/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
195/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
196/// than 0 are saturated to 0. The converted [4 x i8] values are written to
197/// the upper 32 bits of the result.
198/// \returns A 64-bit integer vector of [8 x i8] containing the converted
199/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000200static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000201_mm_packs_pu16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000202{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000203 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000204}
205
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000206/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
207/// and interleaves them into a 64-bit integer vector of [8 x i8].
208///
209/// \headerfile <x86intrin.h>
210///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000211/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000212///
213/// \param __m1
Ekaterina Romanova2e041c92017-01-13 01:14:08 +0000214/// A 64-bit integer vector of [8 x i8]. \n
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000215/// Bits [39:32] are written to bits [7:0] of the result. \n
216/// Bits [47:40] are written to bits [23:16] of the result. \n
217/// Bits [55:48] are written to bits [39:32] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000218/// Bits [63:56] are written to bits [55:48] of the result.
219/// \param __m2
220/// A 64-bit integer vector of [8 x i8].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000221/// Bits [39:32] are written to bits [15:8] of the result. \n
222/// Bits [47:40] are written to bits [31:24] of the result. \n
223/// Bits [55:48] are written to bits [47:40] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000224/// Bits [63:56] are written to bits [63:56] of the result.
225/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
226/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000227static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000228_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000229{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000230 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000231}
232
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000233/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
234/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
235///
236/// \headerfile <x86intrin.h>
237///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000238/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000239///
240/// \param __m1
241/// A 64-bit integer vector of [4 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000242/// Bits [47:32] are written to bits [15:0] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000243/// Bits [63:48] are written to bits [47:32] of the result.
244/// \param __m2
245/// A 64-bit integer vector of [4 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000246/// Bits [47:32] are written to bits [31:16] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000247/// Bits [63:48] are written to bits [63:48] of the result.
248/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
249/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000250static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000251_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000252{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000253 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000254}
255
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000256/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
257/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
258///
259/// \headerfile <x86intrin.h>
260///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000261/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000262///
263/// \param __m1
264/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
265/// the lower 32 bits of the result.
266/// \param __m2
267/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
268/// the upper 32 bits of the result.
269/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
270/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000271static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000272_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000273{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000274 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000275}
276
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000277/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
278/// and interleaves them into a 64-bit integer vector of [8 x i8].
279///
280/// \headerfile <x86intrin.h>
281///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000282/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000283///
284/// \param __m1
285/// A 64-bit integer vector of [8 x i8].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000286/// Bits [7:0] are written to bits [7:0] of the result. \n
287/// Bits [15:8] are written to bits [23:16] of the result. \n
288/// Bits [23:16] are written to bits [39:32] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000289/// Bits [31:24] are written to bits [55:48] of the result.
290/// \param __m2
291/// A 64-bit integer vector of [8 x i8].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000292/// Bits [7:0] are written to bits [15:8] of the result. \n
293/// Bits [15:8] are written to bits [31:24] of the result. \n
294/// Bits [23:16] are written to bits [47:40] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000295/// Bits [31:24] are written to bits [63:56] of the result.
296/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
297/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000298static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000299_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000300{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000301 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000302}
303
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000304/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
305/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
306///
307/// \headerfile <x86intrin.h>
308///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000309/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000310///
311/// \param __m1
312/// A 64-bit integer vector of [4 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000313/// Bits [15:0] are written to bits [15:0] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000314/// Bits [31:16] are written to bits [47:32] of the result.
315/// \param __m2
316/// A 64-bit integer vector of [4 x i16].
Ekaterina Romanovadffe45b2016-12-27 00:49:38 +0000317/// Bits [15:0] are written to bits [31:16] of the result. \n
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000318/// Bits [31:16] are written to bits [63:48] of the result.
319/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
320/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000321static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000322_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000323{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000324 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000325}
326
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000327/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
328/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
329///
330/// \headerfile <x86intrin.h>
331///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000332/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000333///
334/// \param __m1
335/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
336/// the lower 32 bits of the result.
337/// \param __m2
338/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
339/// the upper 32 bits of the result.
340/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
341/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000342static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000343_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000344{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000345 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000346}
347
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000348/// \brief Adds each 8-bit integer element of the first 64-bit integer vector
349/// of [8 x i8] to the corresponding 8-bit integer element of the second
350/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
351/// packed into a 64-bit integer vector of [8 x i8].
352///
353/// \headerfile <x86intrin.h>
354///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000355/// This intrinsic corresponds to the <c> PADDB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000356///
357/// \param __m1
358/// A 64-bit integer vector of [8 x i8].
359/// \param __m2
360/// A 64-bit integer vector of [8 x i8].
361/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
362/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000363static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000364_mm_add_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000365{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000366 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000367}
368
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000369/// \brief Adds each 16-bit integer element of the first 64-bit integer vector
370/// of [4 x i16] to the corresponding 16-bit integer element of the second
371/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
372/// packed into a 64-bit integer vector of [4 x i16].
373///
374/// \headerfile <x86intrin.h>
375///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000376/// This intrinsic corresponds to the <c> PADDW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000377///
378/// \param __m1
379/// A 64-bit integer vector of [4 x i16].
380/// \param __m2
381/// A 64-bit integer vector of [4 x i16].
382/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
383/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000384static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000385_mm_add_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000386{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000387 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000388}
389
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000390/// \brief Adds each 32-bit integer element of the first 64-bit integer vector
391/// of [2 x i32] to the corresponding 32-bit integer element of the second
392/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
393/// packed into a 64-bit integer vector of [2 x i32].
394///
395/// \headerfile <x86intrin.h>
396///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000397/// This intrinsic corresponds to the <c> PADDD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000398///
399/// \param __m1
400/// A 64-bit integer vector of [2 x i32].
401/// \param __m2
402/// A 64-bit integer vector of [2 x i32].
403/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
404/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000405static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000406_mm_add_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000407{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000408 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000409}
410
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000411/// \brief Adds each 8-bit signed integer element of the first 64-bit integer
412/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
413/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
414/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
415/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
416///
417/// \headerfile <x86intrin.h>
418///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000419/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000420///
421/// \param __m1
422/// A 64-bit integer vector of [8 x i8].
423/// \param __m2
424/// A 64-bit integer vector of [8 x i8].
425/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
426/// of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000427static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000428_mm_adds_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000429{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000430 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000431}
432
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000433/// \brief Adds each 16-bit signed integer element of the first 64-bit integer
434/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
435/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
436/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
437/// saturated to 0x8000. The results are packed into a 64-bit integer vector
438/// of [4 x i16].
439///
440/// \headerfile <x86intrin.h>
441///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000442/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000443///
444/// \param __m1
445/// A 64-bit integer vector of [4 x i16].
446/// \param __m2
447/// A 64-bit integer vector of [4 x i16].
448/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
449/// of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000450static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000451_mm_adds_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000452{
Sean Silvae4c37602015-09-12 02:55:19 +0000453 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000454}
455
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000456/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
457/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
458/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
459/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
460/// [8 x i8].
461///
462/// \headerfile <x86intrin.h>
463///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000464/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000465///
466/// \param __m1
467/// A 64-bit integer vector of [8 x i8].
468/// \param __m2
469/// A 64-bit integer vector of [8 x i8].
470/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
471/// unsigned sums of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000472static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000473_mm_adds_pu8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000474{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000475 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000476}
Sean Silvae4c37602015-09-12 02:55:19 +0000477
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000478/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
479/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
480/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
481/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
482/// integer vector of [4 x i16].
483///
484/// \headerfile <x86intrin.h>
485///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000486/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000487///
488/// \param __m1
489/// A 64-bit integer vector of [4 x i16].
490/// \param __m2
491/// A 64-bit integer vector of [4 x i16].
492/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
493/// unsigned sums of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000494static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000495_mm_adds_pu16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000496{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000497 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000498}
499
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000500/// \brief Subtracts each 8-bit integer element of the second 64-bit integer
501/// vector of [8 x i8] from the corresponding 8-bit integer element of the
502/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
503/// are packed into a 64-bit integer vector of [8 x i8].
504///
505/// \headerfile <x86intrin.h>
506///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000507/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000508///
509/// \param __m1
510/// A 64-bit integer vector of [8 x i8] containing the minuends.
511/// \param __m2
512/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
513/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
514/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000515static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000516_mm_sub_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000517{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000518 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000519}
Sean Silvae4c37602015-09-12 02:55:19 +0000520
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000521/// \brief Subtracts each 16-bit integer element of the second 64-bit integer
522/// vector of [4 x i16] from the corresponding 16-bit integer element of the
523/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
524/// results are packed into a 64-bit integer vector of [4 x i16].
525///
526/// \headerfile <x86intrin.h>
527///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000528/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000529///
530/// \param __m1
531/// A 64-bit integer vector of [4 x i16] containing the minuends.
532/// \param __m2
533/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
534/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
535/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000536static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000537_mm_sub_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000538{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000539 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000540}
Sean Silvae4c37602015-09-12 02:55:19 +0000541
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000542/// \brief Subtracts each 32-bit integer element of the second 64-bit integer
543/// vector of [2 x i32] from the corresponding 32-bit integer element of the
544/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
545/// results are packed into a 64-bit integer vector of [2 x i32].
546///
547/// \headerfile <x86intrin.h>
548///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000549/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000550///
551/// \param __m1
552/// A 64-bit integer vector of [2 x i32] containing the minuends.
553/// \param __m2
554/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
555/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
556/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000557static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000558_mm_sub_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000559{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000560 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000561}
562
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000563/// \brief Subtracts each 8-bit signed integer element of the second 64-bit
564/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
565/// element of the first 64-bit integer vector of [8 x i8]. Positive results
566/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
567/// are saturated to 0x80. The results are packed into a 64-bit integer
568/// vector of [8 x i8].
569///
570/// \headerfile <x86intrin.h>
571///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000572/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000573///
574/// \param __m1
575/// A 64-bit integer vector of [8 x i8] containing the minuends.
576/// \param __m2
577/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
578/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
579/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000580static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000581_mm_subs_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000582{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000583 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000584}
585
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000586/// \brief Subtracts each 16-bit signed integer element of the second 64-bit
587/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
588/// element of the first 64-bit integer vector of [4 x i16]. Positive results
589/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
590/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
591/// integer vector of [4 x i16].
592///
593/// \headerfile <x86intrin.h>
594///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000595/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000596///
597/// \param __m1
598/// A 64-bit integer vector of [4 x i16] containing the minuends.
599/// \param __m2
600/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
601/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
602/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000603static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000604_mm_subs_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000605{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000606 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000607}
608
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000609/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
610/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000611/// element of the first 64-bit integer vector of [8 x i8].
612///
613/// If an element of the first vector is less than the corresponding element
614/// of the second vector, the result is saturated to 0. The results are
615/// packed into a 64-bit integer vector of [8 x i8].
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000616///
617/// \headerfile <x86intrin.h>
618///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000619/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000620///
621/// \param __m1
622/// A 64-bit integer vector of [8 x i8] containing the minuends.
623/// \param __m2
624/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
625/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
626/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000627static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000628_mm_subs_pu8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000629{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000630 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000631}
Sean Silvae4c37602015-09-12 02:55:19 +0000632
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000633/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
634/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000635/// integer element of the first 64-bit integer vector of [4 x i16].
636///
637/// If an element of the first vector is less than the corresponding element
638/// of the second vector, the result is saturated to 0. The results are
639/// packed into a 64-bit integer vector of [4 x i16].
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000640///
641/// \headerfile <x86intrin.h>
642///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000643/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000644///
645/// \param __m1
646/// A 64-bit integer vector of [4 x i16] containing the minuends.
647/// \param __m2
648/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
649/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
650/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000651static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000652_mm_subs_pu16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000653{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000654 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000655}
656
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000657/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
658/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
659/// element of the second 64-bit integer vector of [4 x i16] and get four
660/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
661/// The lower 32 bits of these two sums are packed into a 64-bit integer
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000662/// vector of [2 x i32].
663///
664/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
665/// of both parameters are multiplied, and the sum of both results is written
666/// to bits [31:0] of the result.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000667///
668/// \headerfile <x86intrin.h>
669///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000670/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000671///
672/// \param __m1
673/// A 64-bit integer vector of [4 x i16].
674/// \param __m2
675/// A 64-bit integer vector of [4 x i16].
676/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
677/// products of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000678static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000679_mm_madd_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000680{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000681 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000682}
683
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000684/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
685/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
686/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
687/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
688///
689/// \headerfile <x86intrin.h>
690///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000691/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000692///
693/// \param __m1
694/// A 64-bit integer vector of [4 x i16].
695/// \param __m2
696/// A 64-bit integer vector of [4 x i16].
697/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
698/// of the products of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000699static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000700_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000701{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000702 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000703}
Sean Silvae4c37602015-09-12 02:55:19 +0000704
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000705/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
706/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
707/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
708/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
709///
710/// \headerfile <x86intrin.h>
711///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000712/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000713///
714/// \param __m1
715/// A 64-bit integer vector of [4 x i16].
716/// \param __m2
717/// A 64-bit integer vector of [4 x i16].
718/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
719/// of the products of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000720static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000721_mm_mullo_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000722{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000723 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000724}
725
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000726/// \brief Left-shifts each 16-bit signed integer element of the first
727/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
728/// of bits specified by the second parameter, which is a 64-bit integer. The
729/// lower 16 bits of the results are packed into a 64-bit integer vector of
730/// [4 x i16].
731///
732/// \headerfile <x86intrin.h>
733///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000734/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000735///
736/// \param __m
737/// A 64-bit integer vector of [4 x i16].
738/// \param __count
739/// A 64-bit integer vector interpreted as a single 64-bit integer.
740/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000741/// values. If \a __count is greater or equal to 16, the result is set to all
742/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000743static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000744_mm_sll_pi16(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000745{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000746 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000747}
748
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000749/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
750/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
751/// The lower 16 bits of the results are packed into a 64-bit integer vector
752/// of [4 x i16].
753///
754/// \headerfile <x86intrin.h>
755///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000756/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000757///
758/// \param __m
759/// A 64-bit integer vector of [4 x i16].
760/// \param __count
761/// A 32-bit integer value.
762/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000763/// values. If \a __count is greater or equal to 16, the result is set to all
764/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000765static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000766_mm_slli_pi16(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000767{
Sean Silvae4c37602015-09-12 02:55:19 +0000768 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000769}
770
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000771/// \brief Left-shifts each 32-bit signed integer element of the first
772/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
773/// of bits specified by the second parameter, which is a 64-bit integer. The
774/// lower 32 bits of the results are packed into a 64-bit integer vector of
775/// [2 x i32].
776///
777/// \headerfile <x86intrin.h>
778///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000779/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000780///
781/// \param __m
782/// A 64-bit integer vector of [2 x i32].
783/// \param __count
784/// A 64-bit integer vector interpreted as a single 64-bit integer.
785/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000786/// values. If \a __count is greater or equal to 32, the result is set to all
787/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000788static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000789_mm_sll_pi32(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000790{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000791 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000792}
793
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000794/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
795/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
796/// The lower 32 bits of the results are packed into a 64-bit integer vector
797/// of [2 x i32].
798///
799/// \headerfile <x86intrin.h>
800///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000801/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000802///
803/// \param __m
804/// A 64-bit integer vector of [2 x i32].
805/// \param __count
806/// A 32-bit integer value.
807/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000808/// values. If \a __count is greater or equal to 32, the result is set to all
809/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000810static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000811_mm_slli_pi32(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000812{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000813 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000814}
815
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000816/// \brief Left-shifts the first 64-bit integer parameter by the number of bits
817/// specified by the second 64-bit integer parameter. The lower 64 bits of
818/// result are returned.
819///
820/// \headerfile <x86intrin.h>
821///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000822/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000823///
824/// \param __m
825/// A 64-bit integer vector interpreted as a single 64-bit integer.
826/// \param __count
827/// A 64-bit integer vector interpreted as a single 64-bit integer.
828/// \returns A 64-bit integer vector containing the left-shifted value. If
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000829/// \a __count is greater or equal to 64, the result is set to 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000830static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000831_mm_sll_si64(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000832{
Craig Topper1aa231e2016-05-16 06:38:42 +0000833 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000834}
835
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000836/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
837/// number of bits specified by the second parameter, which is a 32-bit
838/// integer. The lower 64 bits of result are returned.
839///
840/// \headerfile <x86intrin.h>
841///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000842/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000843///
844/// \param __m
845/// A 64-bit integer vector interpreted as a single 64-bit integer.
846/// \param __count
847/// A 32-bit integer value.
848/// \returns A 64-bit integer vector containing the left-shifted value. If
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000849/// \a __count is greater or equal to 64, the result is set to 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000850static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000851_mm_slli_si64(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000852{
Craig Topper1aa231e2016-05-16 06:38:42 +0000853 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000854}
855
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000856/// \brief Right-shifts each 16-bit integer element of the first parameter,
857/// which is a 64-bit integer vector of [4 x i16], by the number of bits
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000858/// specified by the second parameter, which is a 64-bit integer.
859///
860/// High-order bits are filled with the sign bit of the initial value of each
861/// 16-bit element. The 16-bit results are packed into a 64-bit integer
862/// vector of [4 x i16].
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000863///
864/// \headerfile <x86intrin.h>
865///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000866/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000867///
868/// \param __m
869/// A 64-bit integer vector of [4 x i16].
870/// \param __count
871/// A 64-bit integer vector interpreted as a single 64-bit integer.
872/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
873/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000874static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000875_mm_sra_pi16(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000876{
Sean Silvae4c37602015-09-12 02:55:19 +0000877 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000878}
879
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000880/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
881/// of [4 x i16] by the number of bits specified by a 32-bit integer.
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000882///
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000883/// High-order bits are filled with the sign bit of the initial value of each
884/// 16-bit element. The 16-bit results are packed into a 64-bit integer
885/// vector of [4 x i16].
886///
887/// \headerfile <x86intrin.h>
888///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000889/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000890///
891/// \param __m
892/// A 64-bit integer vector of [4 x i16].
893/// \param __count
894/// A 32-bit integer value.
895/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
896/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000897static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000898_mm_srai_pi16(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000899{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000900 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000901}
902
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000903/// \brief Right-shifts each 32-bit integer element of the first parameter,
904/// which is a 64-bit integer vector of [2 x i32], by the number of bits
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000905/// specified by the second parameter, which is a 64-bit integer.
906///
907/// High-order bits are filled with the sign bit of the initial value of each
908/// 32-bit element. The 32-bit results are packed into a 64-bit integer
909/// vector of [2 x i32].
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000910///
911/// \headerfile <x86intrin.h>
912///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000913/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000914///
915/// \param __m
916/// A 64-bit integer vector of [2 x i32].
917/// \param __count
918/// A 64-bit integer vector interpreted as a single 64-bit integer.
919/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
920/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000921static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000922_mm_sra_pi32(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000923{
Sean Silvae4c37602015-09-12 02:55:19 +0000924 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000925}
926
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000927/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
928/// of [2 x i32] by the number of bits specified by a 32-bit integer.
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000929///
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000930/// High-order bits are filled with the sign bit of the initial value of each
931/// 32-bit element. The 32-bit results are packed into a 64-bit integer
932/// vector of [2 x i32].
933///
934/// \headerfile <x86intrin.h>
935///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000936/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000937///
938/// \param __m
939/// A 64-bit integer vector of [2 x i32].
940/// \param __count
941/// A 32-bit integer value.
942/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
943/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000944static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000945_mm_srai_pi32(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000946{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000947 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000948}
949
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000950/// \brief Right-shifts each 16-bit integer element of the first parameter,
951/// which is a 64-bit integer vector of [4 x i16], by the number of bits
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000952/// specified by the second parameter, which is a 64-bit integer.
953///
954/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
955/// integer vector of [4 x i16].
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000956///
957/// \headerfile <x86intrin.h>
958///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000959/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000960///
961/// \param __m
962/// A 64-bit integer vector of [4 x i16].
963/// \param __count
964/// A 64-bit integer vector interpreted as a single 64-bit integer.
965/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
966/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000967static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000968_mm_srl_pi16(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000969{
Sean Silvae4c37602015-09-12 02:55:19 +0000970 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000971}
972
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000973/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
974/// of [4 x i16] by the number of bits specified by a 32-bit integer.
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000975///
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000976/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
977/// integer vector of [4 x i16].
978///
979/// \headerfile <x86intrin.h>
980///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +0000981/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000982///
983/// \param __m
984/// A 64-bit integer vector of [4 x i16].
985/// \param __count
986/// A 32-bit integer value.
987/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
988/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000989static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000990_mm_srli_pi16(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000991{
Sean Silvae4c37602015-09-12 02:55:19 +0000992 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000993}
994
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000995/// \brief Right-shifts each 32-bit integer element of the first parameter,
996/// which is a 64-bit integer vector of [2 x i32], by the number of bits
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +0000997/// specified by the second parameter, which is a 64-bit integer.
998///
999/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1000/// integer vector of [2 x i32].
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001001///
1002/// \headerfile <x86intrin.h>
1003///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001004/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001005///
1006/// \param __m
1007/// A 64-bit integer vector of [2 x i32].
1008/// \param __count
1009/// A 64-bit integer vector interpreted as a single 64-bit integer.
1010/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1011/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001012static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001013_mm_srl_pi32(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001014{
Sean Silvae4c37602015-09-12 02:55:19 +00001015 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001016}
1017
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001018/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
1019/// of [2 x i32] by the number of bits specified by a 32-bit integer.
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001020///
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001021/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1022/// integer vector of [2 x i32].
1023///
1024/// \headerfile <x86intrin.h>
1025///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001026/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001027///
1028/// \param __m
1029/// A 64-bit integer vector of [2 x i32].
1030/// \param __count
1031/// A 32-bit integer value.
1032/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1033/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001034static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001035_mm_srli_pi32(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001036{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +00001037 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001038}
1039
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001040/// \brief Right-shifts the first 64-bit integer parameter by the number of bits
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001041/// specified by the second 64-bit integer parameter.
1042///
1043/// High-order bits are cleared.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001044///
1045/// \headerfile <x86intrin.h>
1046///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001047/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001048///
1049/// \param __m
1050/// A 64-bit integer vector interpreted as a single 64-bit integer.
1051/// \param __count
1052/// A 64-bit integer vector interpreted as a single 64-bit integer.
1053/// \returns A 64-bit integer vector containing the right-shifted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00001054static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001055_mm_srl_si64(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001056{
Craig Topper1aa231e2016-05-16 06:38:42 +00001057 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001058}
1059
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001060/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
1061/// number of bits specified by the second parameter, which is a 32-bit
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001062/// integer.
1063///
1064/// High-order bits are cleared.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001065///
1066/// \headerfile <x86intrin.h>
1067///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001068/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001069///
1070/// \param __m
1071/// A 64-bit integer vector interpreted as a single 64-bit integer.
1072/// \param __count
1073/// A 32-bit integer value.
1074/// \returns A 64-bit integer vector containing the right-shifted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00001075static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001076_mm_srli_si64(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001077{
Craig Topper1aa231e2016-05-16 06:38:42 +00001078 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001079}
1080
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001081/// \brief Performs a bitwise AND of two 64-bit integer vectors.
1082///
1083/// \headerfile <x86intrin.h>
1084///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001085/// This intrinsic corresponds to the <c> PAND </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001086///
1087/// \param __m1
1088/// A 64-bit integer vector.
1089/// \param __m2
1090/// A 64-bit integer vector.
1091/// \returns A 64-bit integer vector containing the bitwise AND of both
1092/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001093static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001094_mm_and_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001095{
Craig Topper1aa231e2016-05-16 06:38:42 +00001096 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001097}
1098
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001099/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
1100/// performs a bitwise AND of the intermediate result and the second 64-bit
1101/// integer vector.
1102///
1103/// \headerfile <x86intrin.h>
1104///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001105/// This intrinsic corresponds to the <c> PANDN </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001106///
1107/// \param __m1
1108/// A 64-bit integer vector. The one's complement of this parameter is used
1109/// in the bitwise AND.
1110/// \param __m2
1111/// A 64-bit integer vector.
1112/// \returns A 64-bit integer vector containing the bitwise AND of the second
1113/// parameter and the one's complement of the first parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00001114static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001115_mm_andnot_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001116{
Craig Topper1aa231e2016-05-16 06:38:42 +00001117 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001118}
1119
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001120/// \brief Performs a bitwise OR of two 64-bit integer vectors.
1121///
1122/// \headerfile <x86intrin.h>
1123///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001124/// This intrinsic corresponds to the <c> POR </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001125///
1126/// \param __m1
1127/// A 64-bit integer vector.
1128/// \param __m2
1129/// A 64-bit integer vector.
1130/// \returns A 64-bit integer vector containing the bitwise OR of both
1131/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001132static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001133_mm_or_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001134{
Craig Topper1aa231e2016-05-16 06:38:42 +00001135 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001136}
1137
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001138/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
1139///
1140/// \headerfile <x86intrin.h>
1141///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001142/// This intrinsic corresponds to the <c> PXOR </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001143///
1144/// \param __m1
1145/// A 64-bit integer vector.
1146/// \param __m2
1147/// A 64-bit integer vector.
1148/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1149/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001150static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001151_mm_xor_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001152{
Craig Topper1aa231e2016-05-16 06:38:42 +00001153 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001154}
1155
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001156/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1157/// [8 x i8] to determine if the element of the first vector is equal to the
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001158/// corresponding element of the second vector.
1159///
1160/// The comparison yields 0 for false, 0xFF for true.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001161///
1162/// \headerfile <x86intrin.h>
1163///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001164/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001165///
1166/// \param __m1
1167/// A 64-bit integer vector of [8 x i8].
1168/// \param __m2
1169/// A 64-bit integer vector of [8 x i8].
1170/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1171/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001172static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001173_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001174{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001175 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001176}
1177
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001178/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1179/// [4 x i16] to determine if the element of the first vector is equal to the
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001180/// corresponding element of the second vector.
1181///
1182/// The comparison yields 0 for false, 0xFFFF for true.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001183///
1184/// \headerfile <x86intrin.h>
1185///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001186/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001187///
1188/// \param __m1
1189/// A 64-bit integer vector of [4 x i16].
1190/// \param __m2
1191/// A 64-bit integer vector of [4 x i16].
1192/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1193/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001194static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001195_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001196{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001197 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001198}
1199
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001200/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1201/// [2 x i32] to determine if the element of the first vector is equal to the
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001202/// corresponding element of the second vector.
1203///
1204/// The comparison yields 0 for false, 0xFFFFFFFF for true.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001205///
1206/// \headerfile <x86intrin.h>
1207///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001208/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001209///
1210/// \param __m1
1211/// A 64-bit integer vector of [2 x i32].
1212/// \param __m2
1213/// A 64-bit integer vector of [2 x i32].
1214/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1215/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001216static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001217_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001218{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001219 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001220}
1221
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001222/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1223/// [8 x i8] to determine if the element of the first vector is greater than
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001224/// the corresponding element of the second vector.
1225///
1226/// The comparison yields 0 for false, 0xFF for true.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001227///
1228/// \headerfile <x86intrin.h>
1229///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001230/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001231///
1232/// \param __m1
1233/// A 64-bit integer vector of [8 x i8].
1234/// \param __m2
1235/// A 64-bit integer vector of [8 x i8].
1236/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1237/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001238static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001239_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001240{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001241 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001242}
1243
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001244/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1245/// [4 x i16] to determine if the element of the first vector is greater than
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001246/// the corresponding element of the second vector.
1247///
1248/// The comparison yields 0 for false, 0xFFFF for true.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001249///
1250/// \headerfile <x86intrin.h>
1251///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001252/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001253///
1254/// \param __m1
1255/// A 64-bit integer vector of [4 x i16].
1256/// \param __m2
1257/// A 64-bit integer vector of [4 x i16].
1258/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1259/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001260static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001261_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001262{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001263 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001264}
1265
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001266/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1267/// [2 x i32] to determine if the element of the first vector is greater than
Ekaterina Romanova1d4a0f22017-05-15 03:25:04 +00001268/// the corresponding element of the second vector.
1269///
1270/// The comparison yields 0 for false, 0xFFFFFFFF for true.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001271///
1272/// \headerfile <x86intrin.h>
1273///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001274/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001275///
1276/// \param __m1
1277/// A 64-bit integer vector of [2 x i32].
1278/// \param __m2
1279/// A 64-bit integer vector of [2 x i32].
1280/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1281/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001282static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001283_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001284{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001285 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001286}
1287
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001288/// \brief Constructs a 64-bit integer vector initialized to zero.
1289///
1290/// \headerfile <x86intrin.h>
1291///
Ekaterina Romanova03ecd772017-07-12 20:18:55 +00001292/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001293///
1294/// \returns An initialized 64-bit integer vector with all elements set to zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00001295static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001296_mm_setzero_si64(void)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001297{
1298 return (__m64){ 0LL };
1299}
1300
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001301/// \brief Constructs a 64-bit integer vector initialized with the specified
1302/// 32-bit integer values.
1303///
1304/// \headerfile <x86intrin.h>
1305///
1306/// This intrinsic is a utility function and does not correspond to a specific
1307/// instruction.
1308///
1309/// \param __i1
1310/// A 32-bit integer value used to initialize the upper 32 bits of the
1311/// result.
1312/// \param __i0
1313/// A 32-bit integer value used to initialize the lower 32 bits of the
1314/// result.
1315/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001316static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001317_mm_set_pi32(int __i1, int __i0)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001318{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001319 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001320}
1321
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001322/// \brief Constructs a 64-bit integer vector initialized with the specified
1323/// 16-bit integer values.
1324///
1325/// \headerfile <x86intrin.h>
1326///
1327/// This intrinsic is a utility function and does not correspond to a specific
1328/// instruction.
1329///
1330/// \param __s3
1331/// A 16-bit integer value used to initialize bits [63:48] of the result.
1332/// \param __s2
1333/// A 16-bit integer value used to initialize bits [47:32] of the result.
1334/// \param __s1
1335/// A 16-bit integer value used to initialize bits [31:16] of the result.
1336/// \param __s0
1337/// A 16-bit integer value used to initialize bits [15:0] of the result.
1338/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001339static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001340_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001341{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001342 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001343}
1344
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001345/// \brief Constructs a 64-bit integer vector initialized with the specified
1346/// 8-bit integer values.
1347///
1348/// \headerfile <x86intrin.h>
1349///
1350/// This intrinsic is a utility function and does not correspond to a specific
1351/// instruction.
1352///
1353/// \param __b7
1354/// An 8-bit integer value used to initialize bits [63:56] of the result.
1355/// \param __b6
1356/// An 8-bit integer value used to initialize bits [55:48] of the result.
1357/// \param __b5
1358/// An 8-bit integer value used to initialize bits [47:40] of the result.
1359/// \param __b4
1360/// An 8-bit integer value used to initialize bits [39:32] of the result.
1361/// \param __b3
1362/// An 8-bit integer value used to initialize bits [31:24] of the result.
1363/// \param __b2
1364/// An 8-bit integer value used to initialize bits [23:16] of the result.
1365/// \param __b1
1366/// An 8-bit integer value used to initialize bits [15:8] of the result.
1367/// \param __b0
1368/// An 8-bit integer value used to initialize bits [7:0] of the result.
1369/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001370static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001371_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1372 char __b1, char __b0)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001373{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001374 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1375 __b4, __b5, __b6, __b7);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001376}
1377
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001378/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
1379/// 32-bit integer vector elements set to the specified 32-bit integer
1380/// value.
1381///
1382/// \headerfile <x86intrin.h>
1383///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001384/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001385///
1386/// \param __i
1387/// A 32-bit integer value used to initialize each vector element of the
1388/// result.
1389/// \returns An initialized 64-bit integer vector of [2 x i32].
Michael Kupersteine45af542015-06-30 13:36:19 +00001390static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001391_mm_set1_pi32(int __i)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001392{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001393 return _mm_set_pi32(__i, __i);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001394}
1395
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001396/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
1397/// 16-bit integer vector elements set to the specified 16-bit integer
1398/// value.
1399///
1400/// \headerfile <x86intrin.h>
1401///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001402/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001403///
1404/// \param __w
1405/// A 16-bit integer value used to initialize each vector element of the
1406/// result.
1407/// \returns An initialized 64-bit integer vector of [4 x i16].
Michael Kupersteine45af542015-06-30 13:36:19 +00001408static __inline__ __m64 __DEFAULT_FN_ATTRS
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001409_mm_set1_pi16(short __w)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001410{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001411 return _mm_set_pi16(__w, __w, __w, __w);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001412}
1413
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001414/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
1415/// 8-bit integer vector elements set to the specified 8-bit integer value.
1416///
1417/// \headerfile <x86intrin.h>
1418///
Ekaterina Romanova0c1c3bb2016-12-09 18:35:50 +00001419/// This intrinsic corresponds to the <c> VPUNPCKLBW + VPSHUFLW / PUNPCKLBW +
1420/// PSHUFLW </c> instruction.
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001421///
1422/// \param __b
1423/// An 8-bit integer value used to initialize each vector element of the
1424/// result.
1425/// \returns An initialized 64-bit integer vector of [8 x i8].
Michael Kupersteine45af542015-06-30 13:36:19 +00001426static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001427_mm_set1_pi8(char __b)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001428{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001429 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001430}
1431
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001432/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1433/// the specified 32-bit integer values.
1434///
1435/// \headerfile <x86intrin.h>
1436///
1437/// This intrinsic is a utility function and does not correspond to a specific
1438/// instruction.
1439///
1440/// \param __i0
1441/// A 32-bit integer value used to initialize the lower 32 bits of the
1442/// result.
1443/// \param __i1
1444/// A 32-bit integer value used to initialize the upper 32 bits of the
1445/// result.
1446/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001447static __inline__ __m64 __DEFAULT_FN_ATTRS
Eli Friedmancb59baa2011-05-05 20:21:54 +00001448_mm_setr_pi32(int __i0, int __i1)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001449{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001450 return _mm_set_pi32(__i1, __i0);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001451}
1452
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001453/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1454/// the specified 16-bit integer values.
1455///
1456/// \headerfile <x86intrin.h>
1457///
1458/// This intrinsic is a utility function and does not correspond to a specific
1459/// instruction.
1460///
1461/// \param __w0
1462/// A 16-bit integer value used to initialize bits [15:0] of the result.
1463/// \param __w1
1464/// A 16-bit integer value used to initialize bits [31:16] of the result.
1465/// \param __w2
1466/// A 16-bit integer value used to initialize bits [47:32] of the result.
1467/// \param __w3
1468/// A 16-bit integer value used to initialize bits [63:48] of the result.
1469/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001470static __inline__ __m64 __DEFAULT_FN_ATTRS
Eli Friedmancb59baa2011-05-05 20:21:54 +00001471_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001472{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001473 return _mm_set_pi16(__w3, __w2, __w1, __w0);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001474}
1475
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001476/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1477/// the specified 8-bit integer values.
1478///
1479/// \headerfile <x86intrin.h>
1480///
1481/// This intrinsic is a utility function and does not correspond to a specific
1482/// instruction.
1483///
1484/// \param __b0
1485/// An 8-bit integer value used to initialize bits [7:0] of the result.
1486/// \param __b1
1487/// An 8-bit integer value used to initialize bits [15:8] of the result.
1488/// \param __b2
1489/// An 8-bit integer value used to initialize bits [23:16] of the result.
1490/// \param __b3
1491/// An 8-bit integer value used to initialize bits [31:24] of the result.
1492/// \param __b4
1493/// An 8-bit integer value used to initialize bits [39:32] of the result.
1494/// \param __b5
1495/// An 8-bit integer value used to initialize bits [47:40] of the result.
1496/// \param __b6
1497/// An 8-bit integer value used to initialize bits [55:48] of the result.
1498/// \param __b7
1499/// An 8-bit integer value used to initialize bits [63:56] of the result.
1500/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001501static __inline__ __m64 __DEFAULT_FN_ATTRS
Eli Friedmancb59baa2011-05-05 20:21:54 +00001502_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1503 char __b6, char __b7)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001504{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001505 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001506}
1507
Michael Kupersteine45af542015-06-30 13:36:19 +00001508#undef __DEFAULT_FN_ATTRS
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001509
1510/* Aliases for compatibility. */
1511#define _m_empty _mm_empty
1512#define _m_from_int _mm_cvtsi32_si64
Michael Kuperstein591278c2015-12-20 12:37:18 +00001513#define _m_from_int64 _mm_cvtsi64_m64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001514#define _m_to_int _mm_cvtsi64_si32
Michael Kuperstein591278c2015-12-20 12:37:18 +00001515#define _m_to_int64 _mm_cvtm64_si64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001516#define _m_packsswb _mm_packs_pi16
1517#define _m_packssdw _mm_packs_pi32
1518#define _m_packuswb _mm_packs_pu16
1519#define _m_punpckhbw _mm_unpackhi_pi8
1520#define _m_punpckhwd _mm_unpackhi_pi16
1521#define _m_punpckhdq _mm_unpackhi_pi32
1522#define _m_punpcklbw _mm_unpacklo_pi8
1523#define _m_punpcklwd _mm_unpacklo_pi16
1524#define _m_punpckldq _mm_unpacklo_pi32
1525#define _m_paddb _mm_add_pi8
1526#define _m_paddw _mm_add_pi16
1527#define _m_paddd _mm_add_pi32
1528#define _m_paddsb _mm_adds_pi8
1529#define _m_paddsw _mm_adds_pi16
1530#define _m_paddusb _mm_adds_pu8
1531#define _m_paddusw _mm_adds_pu16
1532#define _m_psubb _mm_sub_pi8
1533#define _m_psubw _mm_sub_pi16
1534#define _m_psubd _mm_sub_pi32
1535#define _m_psubsb _mm_subs_pi8
1536#define _m_psubsw _mm_subs_pi16
1537#define _m_psubusb _mm_subs_pu8
1538#define _m_psubusw _mm_subs_pu16
1539#define _m_pmaddwd _mm_madd_pi16
1540#define _m_pmulhw _mm_mulhi_pi16
1541#define _m_pmullw _mm_mullo_pi16
1542#define _m_psllw _mm_sll_pi16
1543#define _m_psllwi _mm_slli_pi16
1544#define _m_pslld _mm_sll_pi32
1545#define _m_pslldi _mm_slli_pi32
Chandler Carruth42cf8182010-08-08 08:44:32 +00001546#define _m_psllq _mm_sll_si64
1547#define _m_psllqi _mm_slli_si64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001548#define _m_psraw _mm_sra_pi16
1549#define _m_psrawi _mm_srai_pi16
1550#define _m_psrad _mm_sra_pi32
1551#define _m_psradi _mm_srai_pi32
1552#define _m_psrlw _mm_srl_pi16
1553#define _m_psrlwi _mm_srli_pi16
1554#define _m_psrld _mm_srl_pi32
1555#define _m_psrldi _mm_srli_pi32
Chandler Carruth42cf8182010-08-08 08:44:32 +00001556#define _m_psrlq _mm_srl_si64
1557#define _m_psrlqi _mm_srli_si64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001558#define _m_pand _mm_and_si64
1559#define _m_pandn _mm_andnot_si64
1560#define _m_por _mm_or_si64
1561#define _m_pxor _mm_xor_si64
1562#define _m_pcmpeqb _mm_cmpeq_pi8
1563#define _m_pcmpeqw _mm_cmpeq_pi16
1564#define _m_pcmpeqd _mm_cmpeq_pi32
1565#define _m_pcmpgtb _mm_cmpgt_pi8
1566#define _m_pcmpgtw _mm_cmpgt_pi16
1567#define _m_pcmpgtd _mm_cmpgt_pi32
1568
Anders Carlssona5e2e602008-03-03 19:29:06 +00001569#endif /* __MMINTRIN_H */
1570