blob: f239dc6b691f89ae406ca02f143cc65c014d657f [file] [log] [blame]
Anders Carlssona5e2e602008-03-03 19:29:06 +00001/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
Anders Carlssona5e2e602008-03-03 19:29:06 +00003 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __MMINTRIN_H
25#define __MMINTRIN_H
26
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +000027typedef long long __m64 __attribute__((__vector_size__(8)));
Anders Carlssona5e2e602008-03-03 19:29:06 +000028
Craig Topper1aa231e2016-05-16 06:38:42 +000029typedef long long __v1di __attribute__((__vector_size__(8)));
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +000030typedef int __v2si __attribute__((__vector_size__(8)));
31typedef short __v4hi __attribute__((__vector_size__(8)));
Anders Carlsson327c8df2009-09-18 19:18:19 +000032typedef char __v8qi __attribute__((__vector_size__(8)));
Anders Carlssona5e2e602008-03-03 19:29:06 +000033
Eric Christopher4d1851682015-06-17 07:09:20 +000034/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +000035#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
Eric Christopher4d1851682015-06-17 07:09:20 +000036
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000037/// \brief Clears the MMX state by setting the state of the x87 stack registers
38/// to empty.
39///
40/// \headerfile <x86intrin.h>
41///
42/// This intrinsic corresponds to the \c EMMS instruction.
43///
Michael Kupersteine45af542015-06-30 13:36:19 +000044static __inline__ void __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000045_mm_empty(void)
Anders Carlssona5e2e602008-03-03 19:29:06 +000046{
47 __builtin_ia32_emms();
48}
49
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000050/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
51/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
52///
53/// \headerfile <x86intrin.h>
54///
55/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
56///
57/// \param __i
58/// A 32-bit integer value.
59/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
60/// parameter. The upper 32 bits are set to 0.
Michael Kupersteine45af542015-06-30 13:36:19 +000061static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000062_mm_cvtsi32_si64(int __i)
Anders Carlssona5e2e602008-03-03 19:29:06 +000063{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +000064 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
Anders Carlssona5e2e602008-03-03 19:29:06 +000065}
66
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000067/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
68/// signed integer.
69///
70/// \headerfile <x86intrin.h>
71///
72/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
73///
74/// \param __m
75/// A 64-bit integer vector.
76/// \returns A 32-bit signed integer value containing the lower 32 bits of the
77/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +000078static __inline__ int __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000079_mm_cvtsi64_si32(__m64 __m)
Anders Carlssona5e2e602008-03-03 19:29:06 +000080{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +000081 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
Anders Carlssona5e2e602008-03-03 19:29:06 +000082}
83
Ekaterina Romanova71a68c92016-06-10 00:10:40 +000084/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
85///
86/// \headerfile <x86intrin.h>
87///
88/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
89///
90/// \param __i
91/// A 64-bit signed integer.
92/// \returns A 64-bit integer vector containing the same bitwise pattern as the
93/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +000094static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +000095_mm_cvtsi64_m64(long long __i)
Anders Carlssona5e2e602008-03-03 19:29:06 +000096{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +000097 return (__m64)__i;
Anders Carlssona5e2e602008-03-03 19:29:06 +000098}
99
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000100/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
101///
102/// \headerfile <x86intrin.h>
103///
104/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
105///
106/// \param __m
107/// A 64-bit integer vector.
108/// \returns A 64-bit signed integer containing the same bitwise pattern as the
109/// parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +0000110static __inline__ long long __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000111_mm_cvtm64_si64(__m64 __m)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000112{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000113 return (long long)__m;
Anders Carlssona5e2e602008-03-03 19:29:06 +0000114}
115
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000116/// \brief Converts 16-bit signed integers from both 64-bit integer vector
117/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
118/// a 64-bit integer vector of [8 x i8] as the result. Positive values
119/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
120/// are saturated to 0x80.
121///
122/// \headerfile <x86intrin.h>
123///
124/// This intrinsic corresponds to the \c PACKSSWB instruction.
125///
126/// \param __m1
127/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
128/// 16-bit signed integer and is converted to an 8-bit signed integer with
129/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
130/// Negative values less than 0x80 are saturated to 0x80. The converted
131/// [4 x i8] values are written to the lower 32 bits of the result.
132/// \param __m2
133/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
134/// 16-bit signed integer and is converted to an 8-bit signed integer with
135/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
136/// Negative values less than 0x80 are saturated to 0x80. The converted
137/// [4 x i8] values are written to the upper 32 bits of the result.
138/// \returns A 64-bit integer vector of [8 x i8] containing the converted
139/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000140static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000141_mm_packs_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000142{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000143 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000144}
145
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000146/// \brief Converts 32-bit signed integers from both 64-bit integer vector
147/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
148/// a 64-bit integer vector of [4 x i16] as the result. Positive values
149/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
150/// 0x8000 are saturated to 0x8000.
151///
152/// \headerfile <x86intrin.h>
153///
154/// This intrinsic corresponds to the \c PACKSSDW instruction.
155///
156/// \param __m1
157/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
158/// 32-bit signed integer and is converted to a 16-bit signed integer with
159/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
160/// Negative values less than 0x8000 are saturated to 0x8000. The converted
161/// [2 x i16] values are written to the lower 32 bits of the result.
162/// \param __m2
163/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
164/// 32-bit signed integer and is converted to a 16-bit signed integer with
165/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
166/// Negative values less than 0x8000 are saturated to 0x8000. The converted
167/// [2 x i16] values are written to the upper 32 bits of the result.
168/// \returns A 64-bit integer vector of [4 x i16] containing the converted
169/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000170static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000171_mm_packs_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000172{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000173 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000174}
175
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000176/// \brief Converts 16-bit signed integers from both 64-bit integer vector
177/// parameters of [4 x i16] into 8-bit unsigned integer values, and
178/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
179/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
180/// to 0.
181///
182/// \headerfile <x86intrin.h>
183///
184/// This intrinsic corresponds to the \c PACKUSWB instruction.
185///
186/// \param __m1
187/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
188/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
189/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
190/// than 0 are saturated to 0. The converted [4 x i8] values are written to
191/// the lower 32 bits of the result.
192/// \param __m2
193/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
194/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
195/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
196/// than 0 are saturated to 0. The converted [4 x i8] values are written to
197/// the upper 32 bits of the result.
198/// \returns A 64-bit integer vector of [8 x i8] containing the converted
199/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000200static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000201_mm_packs_pu16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000202{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000203 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000204}
205
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000206/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
207/// and interleaves them into a 64-bit integer vector of [8 x i8].
208///
209/// \headerfile <x86intrin.h>
210///
211/// This intrinsic corresponds to the \c PUNPCKHBW instruction.
212///
213/// \param __m1
214/// A 64-bit integer vector of [8 x i8].
215/// Bits [39:32] are written to bits [7:0] of the result.
216/// Bits [47:40] are written to bits [23:16] of the result.
217/// Bits [55:48] are written to bits [39:32] of the result.
218/// Bits [63:56] are written to bits [55:48] of the result.
219/// \param __m2
220/// A 64-bit integer vector of [8 x i8].
221/// Bits [39:32] are written to bits [15:8] of the result.
222/// Bits [47:40] are written to bits [31:24] of the result.
223/// Bits [55:48] are written to bits [47:40] of the result.
224/// Bits [63:56] are written to bits [63:56] of the result.
225/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
226/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000227static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000228_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000229{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000230 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000231}
232
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000233/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
234/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
235///
236/// \headerfile <x86intrin.h>
237///
238/// This intrinsic corresponds to the \c PUNPCKHWD instruction.
239///
240/// \param __m1
241/// A 64-bit integer vector of [4 x i16].
242/// Bits [47:32] are written to bits [15:0] of the result.
243/// Bits [63:48] are written to bits [47:32] of the result.
244/// \param __m2
245/// A 64-bit integer vector of [4 x i16].
246/// Bits [47:32] are written to bits [31:16] of the result.
247/// Bits [63:48] are written to bits [63:48] of the result.
248/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
249/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000250static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000251_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000252{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000253 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000254}
255
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000256/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
257/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
258///
259/// \headerfile <x86intrin.h>
260///
261/// This intrinsic corresponds to the \c PUNPCKHDQ instruction.
262///
263/// \param __m1
264/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
265/// the lower 32 bits of the result.
266/// \param __m2
267/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
268/// the upper 32 bits of the result.
269/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
270/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000271static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000272_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000273{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000274 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000275}
276
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000277/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
278/// and interleaves them into a 64-bit integer vector of [8 x i8].
279///
280/// \headerfile <x86intrin.h>
281///
282/// This intrinsic corresponds to the \c PUNPCKLBW instruction.
283///
284/// \param __m1
285/// A 64-bit integer vector of [8 x i8].
286/// Bits [7:0] are written to bits [7:0] of the result.
287/// Bits [15:8] are written to bits [23:16] of the result.
288/// Bits [23:16] are written to bits [39:32] of the result.
289/// Bits [31:24] are written to bits [55:48] of the result.
290/// \param __m2
291/// A 64-bit integer vector of [8 x i8].
292/// Bits [7:0] are written to bits [15:8] of the result.
293/// Bits [15:8] are written to bits [31:24] of the result.
294/// Bits [23:16] are written to bits [47:40] of the result.
295/// Bits [31:24] are written to bits [63:56] of the result.
296/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
297/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000298static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000299_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000300{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000301 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000302}
303
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000304/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
305/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
306///
307/// \headerfile <x86intrin.h>
308///
309/// This intrinsic corresponds to the \c PUNPCKLWD instruction.
310///
311/// \param __m1
312/// A 64-bit integer vector of [4 x i16].
313/// Bits [15:0] are written to bits [15:0] of the result.
314/// Bits [31:16] are written to bits [47:32] of the result.
315/// \param __m2
316/// A 64-bit integer vector of [4 x i16].
317/// Bits [15:0] are written to bits [31:16] of the result.
318/// Bits [31:16] are written to bits [63:48] of the result.
319/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
320/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000321static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000322_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000323{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000324 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000325}
326
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000327/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
328/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
329///
330/// \headerfile <x86intrin.h>
331///
332/// This intrinsic corresponds to the \c PUNPCKLDQ instruction.
333///
334/// \param __m1
335/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
336/// the lower 32 bits of the result.
337/// \param __m2
338/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
339/// the upper 32 bits of the result.
340/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
341/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000342static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000343_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000344{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000345 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000346}
347
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000348/// \brief Adds each 8-bit integer element of the first 64-bit integer vector
349/// of [8 x i8] to the corresponding 8-bit integer element of the second
350/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
351/// packed into a 64-bit integer vector of [8 x i8].
352///
353/// \headerfile <x86intrin.h>
354///
355/// This intrinsic corresponds to the \c PADDB instruction.
356///
357/// \param __m1
358/// A 64-bit integer vector of [8 x i8].
359/// \param __m2
360/// A 64-bit integer vector of [8 x i8].
361/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
362/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000363static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000364_mm_add_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000365{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000366 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000367}
368
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000369/// \brief Adds each 16-bit integer element of the first 64-bit integer vector
370/// of [4 x i16] to the corresponding 16-bit integer element of the second
371/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
372/// packed into a 64-bit integer vector of [4 x i16].
373///
374/// \headerfile <x86intrin.h>
375///
376/// This intrinsic corresponds to the \c PADDW instruction.
377///
378/// \param __m1
379/// A 64-bit integer vector of [4 x i16].
380/// \param __m2
381/// A 64-bit integer vector of [4 x i16].
382/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
383/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000384static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000385_mm_add_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000386{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000387 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000388}
389
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000390/// \brief Adds each 32-bit integer element of the first 64-bit integer vector
391/// of [2 x i32] to the corresponding 32-bit integer element of the second
392/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
393/// packed into a 64-bit integer vector of [2 x i32].
394///
395/// \headerfile <x86intrin.h>
396///
397/// This intrinsic corresponds to the \c PADDD instruction.
398///
399/// \param __m1
400/// A 64-bit integer vector of [2 x i32].
401/// \param __m2
402/// A 64-bit integer vector of [2 x i32].
403/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
404/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000405static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000406_mm_add_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000407{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000408 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000409}
410
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000411/// \brief Adds each 8-bit signed integer element of the first 64-bit integer
412/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
413/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
414/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
415/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
416///
417/// \headerfile <x86intrin.h>
418///
419/// This intrinsic corresponds to the \c PADDSB instruction.
420///
421/// \param __m1
422/// A 64-bit integer vector of [8 x i8].
423/// \param __m2
424/// A 64-bit integer vector of [8 x i8].
425/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
426/// of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000427static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000428_mm_adds_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000429{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000430 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000431}
432
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000433/// \brief Adds each 16-bit signed integer element of the first 64-bit integer
434/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
435/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
436/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
437/// saturated to 0x8000. The results are packed into a 64-bit integer vector
438/// of [4 x i16].
439///
440/// \headerfile <x86intrin.h>
441///
442/// This intrinsic corresponds to the \c PADDSW instruction.
443///
444/// \param __m1
445/// A 64-bit integer vector of [4 x i16].
446/// \param __m2
447/// A 64-bit integer vector of [4 x i16].
448/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
449/// of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000450static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000451_mm_adds_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000452{
Sean Silvae4c37602015-09-12 02:55:19 +0000453 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000454}
455
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000456/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
457/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
458/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
459/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
460/// [8 x i8].
461///
462/// \headerfile <x86intrin.h>
463///
464/// This intrinsic corresponds to the \c PADDUSB instruction.
465///
466/// \param __m1
467/// A 64-bit integer vector of [8 x i8].
468/// \param __m2
469/// A 64-bit integer vector of [8 x i8].
470/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
471/// unsigned sums of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000472static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000473_mm_adds_pu8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000474{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000475 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000476}
Sean Silvae4c37602015-09-12 02:55:19 +0000477
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000478/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
479/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
480/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
481/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
482/// integer vector of [4 x i16].
483///
484/// \headerfile <x86intrin.h>
485///
486/// This intrinsic corresponds to the \c PADDUSW instruction.
487///
488/// \param __m1
489/// A 64-bit integer vector of [4 x i16].
490/// \param __m2
491/// A 64-bit integer vector of [4 x i16].
492/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
493/// unsigned sums of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000494static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000495_mm_adds_pu16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000496{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000497 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000498}
499
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000500/// \brief Subtracts each 8-bit integer element of the second 64-bit integer
501/// vector of [8 x i8] from the corresponding 8-bit integer element of the
502/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
503/// are packed into a 64-bit integer vector of [8 x i8].
504///
505/// \headerfile <x86intrin.h>
506///
507/// This intrinsic corresponds to the \c PSUBB instruction.
508///
509/// \param __m1
510/// A 64-bit integer vector of [8 x i8] containing the minuends.
511/// \param __m2
512/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
513/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
514/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000515static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000516_mm_sub_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000517{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000518 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000519}
Sean Silvae4c37602015-09-12 02:55:19 +0000520
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000521/// \brief Subtracts each 16-bit integer element of the second 64-bit integer
522/// vector of [4 x i16] from the corresponding 16-bit integer element of the
523/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
524/// results are packed into a 64-bit integer vector of [4 x i16].
525///
526/// \headerfile <x86intrin.h>
527///
528/// This intrinsic corresponds to the \c PSUBW instruction.
529///
530/// \param __m1
531/// A 64-bit integer vector of [4 x i16] containing the minuends.
532/// \param __m2
533/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
534/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
535/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000536static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000537_mm_sub_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000538{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000539 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000540}
Sean Silvae4c37602015-09-12 02:55:19 +0000541
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000542/// \brief Subtracts each 32-bit integer element of the second 64-bit integer
543/// vector of [2 x i32] from the corresponding 32-bit integer element of the
544/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
545/// results are packed into a 64-bit integer vector of [2 x i32].
546///
547/// \headerfile <x86intrin.h>
548///
549/// This intrinsic corresponds to the \c PSUBD instruction.
550///
551/// \param __m1
552/// A 64-bit integer vector of [2 x i32] containing the minuends.
553/// \param __m2
554/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
555/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
556/// both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000557static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000558_mm_sub_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000559{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000560 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000561}
562
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000563/// \brief Subtracts each 8-bit signed integer element of the second 64-bit
564/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
565/// element of the first 64-bit integer vector of [8 x i8]. Positive results
566/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
567/// are saturated to 0x80. The results are packed into a 64-bit integer
568/// vector of [8 x i8].
569///
570/// \headerfile <x86intrin.h>
571///
572/// This intrinsic corresponds to the \c PSUBSB instruction.
573///
574/// \param __m1
575/// A 64-bit integer vector of [8 x i8] containing the minuends.
576/// \param __m2
577/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
578/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
579/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000580static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000581_mm_subs_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000582{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000583 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000584}
585
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000586/// \brief Subtracts each 16-bit signed integer element of the second 64-bit
587/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
588/// element of the first 64-bit integer vector of [4 x i16]. Positive results
589/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
590/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
591/// integer vector of [4 x i16].
592///
593/// \headerfile <x86intrin.h>
594///
595/// This intrinsic corresponds to the \c PSUBSW instruction.
596///
597/// \param __m1
598/// A 64-bit integer vector of [4 x i16] containing the minuends.
599/// \param __m2
600/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
601/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
602/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000603static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000604_mm_subs_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000605{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000606 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000607}
608
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000609/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
610/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
611/// element of the first 64-bit integer vector of [8 x i8]. If an element of
612/// the first vector is less than the corresponding element of the second
613/// vector, the result is saturated to 0. The results are packed into a
614/// 64-bit integer vector of [8 x i8].
615///
616/// \headerfile <x86intrin.h>
617///
618/// This intrinsic corresponds to the \c PSUBUSB instruction.
619///
620/// \param __m1
621/// A 64-bit integer vector of [8 x i8] containing the minuends.
622/// \param __m2
623/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
624/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
625/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000626static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000627_mm_subs_pu8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000628{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000629 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000630}
Sean Silvae4c37602015-09-12 02:55:19 +0000631
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000632/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
633/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
634/// integer element of the first 64-bit integer vector of [4 x i16]. If an
635/// element of the first vector is less than the corresponding element of the
636/// second vector, the result is saturated to 0. The results are packed into
637/// a 64-bit integer vector of [4 x i16].
638///
639/// \headerfile <x86intrin.h>
640///
641/// This intrinsic corresponds to the \c PSUBUSW instruction.
642///
643/// \param __m1
644/// A 64-bit integer vector of [4 x i16] containing the minuends.
645/// \param __m2
646/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
647/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
648/// differences of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000649static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000650_mm_subs_pu16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000651{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000652 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000653}
654
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000655/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
656/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
657/// element of the second 64-bit integer vector of [4 x i16] and get four
658/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
659/// The lower 32 bits of these two sums are packed into a 64-bit integer
660/// vector of [2 x i32]. For example, bits [15:0] of both parameters are
661/// multiplied, bits [31:16] of both parameters are multiplied, and the sum
662/// of both results is written to bits [31:0] of the result.
663///
664/// \headerfile <x86intrin.h>
665///
666/// This intrinsic corresponds to the \c PMADDWD instruction.
667///
668/// \param __m1
669/// A 64-bit integer vector of [4 x i16].
670/// \param __m2
671/// A 64-bit integer vector of [4 x i16].
672/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
673/// products of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000674static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000675_mm_madd_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000676{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000677 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000678}
679
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000680/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
681/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
682/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
683/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
684///
685/// \headerfile <x86intrin.h>
686///
687/// This intrinsic corresponds to the \c PMULHW instruction.
688///
689/// \param __m1
690/// A 64-bit integer vector of [4 x i16].
691/// \param __m2
692/// A 64-bit integer vector of [4 x i16].
693/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
694/// of the products of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000695static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000696_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000697{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000698 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000699}
Sean Silvae4c37602015-09-12 02:55:19 +0000700
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000701/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
702/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
703/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
704/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
705///
706/// \headerfile <x86intrin.h>
707///
708/// This intrinsic corresponds to the \c PMULLW instruction.
709///
710/// \param __m1
711/// A 64-bit integer vector of [4 x i16].
712/// \param __m2
713/// A 64-bit integer vector of [4 x i16].
714/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
715/// of the products of both parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +0000716static __inline__ __m64 __DEFAULT_FN_ATTRS
Sean Silvae4c37602015-09-12 02:55:19 +0000717_mm_mullo_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000718{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +0000719 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000720}
721
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000722/// \brief Left-shifts each 16-bit signed integer element of the first
723/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
724/// of bits specified by the second parameter, which is a 64-bit integer. The
725/// lower 16 bits of the results are packed into a 64-bit integer vector of
726/// [4 x i16].
727///
728/// \headerfile <x86intrin.h>
729///
730/// This intrinsic corresponds to the \c PSLLW instruction.
731///
732/// \param __m
733/// A 64-bit integer vector of [4 x i16].
734/// \param __count
735/// A 64-bit integer vector interpreted as a single 64-bit integer.
736/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000737/// values. If \a __count is greater or equal to 16, the result is set to all
738/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000739static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000740_mm_sll_pi16(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000741{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000742 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000743}
744
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000745/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
746/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
747/// The lower 16 bits of the results are packed into a 64-bit integer vector
748/// of [4 x i16].
749///
750/// \headerfile <x86intrin.h>
751///
752/// This intrinsic corresponds to the \c PSLLW instruction.
753///
754/// \param __m
755/// A 64-bit integer vector of [4 x i16].
756/// \param __count
757/// A 32-bit integer value.
758/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000759/// values. If \a __count is greater or equal to 16, the result is set to all
760/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000761static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000762_mm_slli_pi16(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000763{
Sean Silvae4c37602015-09-12 02:55:19 +0000764 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000765}
766
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000767/// \brief Left-shifts each 32-bit signed integer element of the first
768/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
769/// of bits specified by the second parameter, which is a 64-bit integer. The
770/// lower 32 bits of the results are packed into a 64-bit integer vector of
771/// [2 x i32].
772///
773/// \headerfile <x86intrin.h>
774///
775/// This intrinsic corresponds to the \c PSLLD instruction.
776///
777/// \param __m
778/// A 64-bit integer vector of [2 x i32].
779/// \param __count
780/// A 64-bit integer vector interpreted as a single 64-bit integer.
781/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000782/// values. If \a __count is greater or equal to 32, the result is set to all
783/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000784static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000785_mm_sll_pi32(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000786{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000787 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000788}
789
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000790/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
791/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
792/// The lower 32 bits of the results are packed into a 64-bit integer vector
793/// of [2 x i32].
794///
795/// \headerfile <x86intrin.h>
796///
797/// This intrinsic corresponds to the \c PSLLD instruction.
798///
799/// \param __m
800/// A 64-bit integer vector of [2 x i32].
801/// \param __count
802/// A 32-bit integer value.
803/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000804/// values. If \a __count is greater or equal to 32, the result is set to all
805/// 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000806static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000807_mm_slli_pi32(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000808{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000809 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000810}
811
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000812/// \brief Left-shifts the first 64-bit integer parameter by the number of bits
813/// specified by the second 64-bit integer parameter. The lower 64 bits of
814/// result are returned.
815///
816/// \headerfile <x86intrin.h>
817///
818/// This intrinsic corresponds to the \c PSLLQ instruction.
819///
820/// \param __m
821/// A 64-bit integer vector interpreted as a single 64-bit integer.
822/// \param __count
823/// A 64-bit integer vector interpreted as a single 64-bit integer.
824/// \returns A 64-bit integer vector containing the left-shifted value. If
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000825/// \a __count is greater or equal to 64, the result is set to 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000826static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000827_mm_sll_si64(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000828{
Craig Topper1aa231e2016-05-16 06:38:42 +0000829 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000830}
831
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000832/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
833/// number of bits specified by the second parameter, which is a 32-bit
834/// integer. The lower 64 bits of result are returned.
835///
836/// \headerfile <x86intrin.h>
837///
838/// This intrinsic corresponds to the \c PSLLQ instruction.
839///
840/// \param __m
841/// A 64-bit integer vector interpreted as a single 64-bit integer.
842/// \param __count
843/// A 32-bit integer value.
844/// \returns A 64-bit integer vector containing the left-shifted value. If
Ekaterina Romanova3494a592016-12-08 23:32:07 +0000845/// \a __count is greater or equal to 64, the result is set to 0.
Michael Kupersteine45af542015-06-30 13:36:19 +0000846static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000847_mm_slli_si64(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000848{
Craig Topper1aa231e2016-05-16 06:38:42 +0000849 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000850}
851
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000852/// \brief Right-shifts each 16-bit integer element of the first parameter,
853/// which is a 64-bit integer vector of [4 x i16], by the number of bits
854/// specified by the second parameter, which is a 64-bit integer. High-order
855/// bits are filled with the sign bit of the initial value of each 16-bit
856/// element. The 16-bit results are packed into a 64-bit integer vector of
857/// [4 x i16].
858///
859/// \headerfile <x86intrin.h>
860///
861/// This intrinsic corresponds to the \c PSRAW instruction.
862///
863/// \param __m
864/// A 64-bit integer vector of [4 x i16].
865/// \param __count
866/// A 64-bit integer vector interpreted as a single 64-bit integer.
867/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
868/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000869static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000870_mm_sra_pi16(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000871{
Sean Silvae4c37602015-09-12 02:55:19 +0000872 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000873}
874
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000875/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
876/// of [4 x i16] by the number of bits specified by a 32-bit integer.
877/// High-order bits are filled with the sign bit of the initial value of each
878/// 16-bit element. The 16-bit results are packed into a 64-bit integer
879/// vector of [4 x i16].
880///
881/// \headerfile <x86intrin.h>
882///
883/// This intrinsic corresponds to the \c PSRAW instruction.
884///
885/// \param __m
886/// A 64-bit integer vector of [4 x i16].
887/// \param __count
888/// A 32-bit integer value.
889/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
890/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000891static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000892_mm_srai_pi16(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000893{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000894 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000895}
896
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000897/// \brief Right-shifts each 32-bit integer element of the first parameter,
898/// which is a 64-bit integer vector of [2 x i32], by the number of bits
899/// specified by the second parameter, which is a 64-bit integer. High-order
900/// bits are filled with the sign bit of the initial value of each 32-bit
901/// element. The 32-bit results are packed into a 64-bit integer vector of
902/// [2 x i32].
903///
904/// \headerfile <x86intrin.h>
905///
906/// This intrinsic corresponds to the \c PSRAD instruction.
907///
908/// \param __m
909/// A 64-bit integer vector of [2 x i32].
910/// \param __count
911/// A 64-bit integer vector interpreted as a single 64-bit integer.
912/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
913/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000914static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000915_mm_sra_pi32(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000916{
Sean Silvae4c37602015-09-12 02:55:19 +0000917 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000918}
919
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000920/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
921/// of [2 x i32] by the number of bits specified by a 32-bit integer.
922/// High-order bits are filled with the sign bit of the initial value of each
923/// 32-bit element. The 32-bit results are packed into a 64-bit integer
924/// vector of [2 x i32].
925///
926/// \headerfile <x86intrin.h>
927///
928/// This intrinsic corresponds to the \c PSRAD instruction.
929///
930/// \param __m
931/// A 64-bit integer vector of [2 x i32].
932/// \param __count
933/// A 32-bit integer value.
934/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
935/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000936static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000937_mm_srai_pi32(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000938{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +0000939 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000940}
941
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000942/// \brief Right-shifts each 16-bit integer element of the first parameter,
943/// which is a 64-bit integer vector of [4 x i16], by the number of bits
944/// specified by the second parameter, which is a 64-bit integer. High-order
945/// bits are cleared. The 16-bit results are packed into a 64-bit integer
946/// vector of [4 x i16].
947///
948/// \headerfile <x86intrin.h>
949///
950/// This intrinsic corresponds to the \c PSRLW instruction.
951///
952/// \param __m
953/// A 64-bit integer vector of [4 x i16].
954/// \param __count
955/// A 64-bit integer vector interpreted as a single 64-bit integer.
956/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
957/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000958static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000959_mm_srl_pi16(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000960{
Sean Silvae4c37602015-09-12 02:55:19 +0000961 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000962}
963
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000964/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
965/// of [4 x i16] by the number of bits specified by a 32-bit integer.
966/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
967/// integer vector of [4 x i16].
968///
969/// \headerfile <x86intrin.h>
970///
971/// This intrinsic corresponds to the \c PSRLW instruction.
972///
973/// \param __m
974/// A 64-bit integer vector of [4 x i16].
975/// \param __count
976/// A 32-bit integer value.
977/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
978/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +0000979static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +0000980_mm_srli_pi16(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +0000981{
Sean Silvae4c37602015-09-12 02:55:19 +0000982 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +0000983}
984
Ekaterina Romanova71a68c92016-06-10 00:10:40 +0000985/// \brief Right-shifts each 32-bit integer element of the first parameter,
986/// which is a 64-bit integer vector of [2 x i32], by the number of bits
987/// specified by the second parameter, which is a 64-bit integer. High-order
988/// bits are cleared. The 32-bit results are packed into a 64-bit integer
989/// vector of [2 x i32].
990///
991/// \headerfile <x86intrin.h>
992///
993/// This intrinsic corresponds to the \c PSRLD instruction.
994///
995/// \param __m
996/// A 64-bit integer vector of [2 x i32].
997/// \param __count
998/// A 64-bit integer vector interpreted as a single 64-bit integer.
999/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1000/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001001static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001002_mm_srl_pi32(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001003{
Sean Silvae4c37602015-09-12 02:55:19 +00001004 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001005}
1006
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001007/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
1008/// of [2 x i32] by the number of bits specified by a 32-bit integer.
1009/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1010/// integer vector of [2 x i32].
1011///
1012/// \headerfile <x86intrin.h>
1013///
1014/// This intrinsic corresponds to the \c PSRLD instruction.
1015///
1016/// \param __m
1017/// A 64-bit integer vector of [2 x i32].
1018/// \param __count
1019/// A 32-bit integer value.
1020/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1021/// values.
Michael Kupersteine45af542015-06-30 13:36:19 +00001022static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001023_mm_srli_pi32(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001024{
Eli Friedmanf0d0e9e2008-05-14 20:32:22 +00001025 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001026}
1027
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001028/// \brief Right-shifts the first 64-bit integer parameter by the number of bits
1029/// specified by the second 64-bit integer parameter. High-order bits are
1030/// cleared.
1031///
1032/// \headerfile <x86intrin.h>
1033///
1034/// This intrinsic corresponds to the \c PSRLQ instruction.
1035///
1036/// \param __m
1037/// A 64-bit integer vector interpreted as a single 64-bit integer.
1038/// \param __count
1039/// A 64-bit integer vector interpreted as a single 64-bit integer.
1040/// \returns A 64-bit integer vector containing the right-shifted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00001041static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001042_mm_srl_si64(__m64 __m, __m64 __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001043{
Craig Topper1aa231e2016-05-16 06:38:42 +00001044 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001045}
1046
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001047/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
1048/// number of bits specified by the second parameter, which is a 32-bit
1049/// integer. High-order bits are cleared.
1050///
1051/// \headerfile <x86intrin.h>
1052///
1053/// This intrinsic corresponds to the \c PSRLQ instruction.
1054///
1055/// \param __m
1056/// A 64-bit integer vector interpreted as a single 64-bit integer.
1057/// \param __count
1058/// A 32-bit integer value.
1059/// \returns A 64-bit integer vector containing the right-shifted value.
Michael Kupersteine45af542015-06-30 13:36:19 +00001060static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001061_mm_srli_si64(__m64 __m, int __count)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001062{
Craig Topper1aa231e2016-05-16 06:38:42 +00001063 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001064}
1065
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001066/// \brief Performs a bitwise AND of two 64-bit integer vectors.
1067///
1068/// \headerfile <x86intrin.h>
1069///
1070/// This intrinsic corresponds to the \c PAND instruction.
1071///
1072/// \param __m1
1073/// A 64-bit integer vector.
1074/// \param __m2
1075/// A 64-bit integer vector.
1076/// \returns A 64-bit integer vector containing the bitwise AND of both
1077/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001078static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001079_mm_and_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001080{
Craig Topper1aa231e2016-05-16 06:38:42 +00001081 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001082}
1083
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001084/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
1085/// performs a bitwise AND of the intermediate result and the second 64-bit
1086/// integer vector.
1087///
1088/// \headerfile <x86intrin.h>
1089///
1090/// This intrinsic corresponds to the \c PANDN instruction.
1091///
1092/// \param __m1
1093/// A 64-bit integer vector. The one's complement of this parameter is used
1094/// in the bitwise AND.
1095/// \param __m2
1096/// A 64-bit integer vector.
1097/// \returns A 64-bit integer vector containing the bitwise AND of the second
1098/// parameter and the one's complement of the first parameter.
Michael Kupersteine45af542015-06-30 13:36:19 +00001099static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001100_mm_andnot_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001101{
Craig Topper1aa231e2016-05-16 06:38:42 +00001102 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001103}
1104
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001105/// \brief Performs a bitwise OR of two 64-bit integer vectors.
1106///
1107/// \headerfile <x86intrin.h>
1108///
1109/// This intrinsic corresponds to the \c POR instruction.
1110///
1111/// \param __m1
1112/// A 64-bit integer vector.
1113/// \param __m2
1114/// A 64-bit integer vector.
1115/// \returns A 64-bit integer vector containing the bitwise OR of both
1116/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001117static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001118_mm_or_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001119{
Craig Topper1aa231e2016-05-16 06:38:42 +00001120 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001121}
1122
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001123/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
1124///
1125/// \headerfile <x86intrin.h>
1126///
1127/// This intrinsic corresponds to the \c PXOR instruction.
1128///
1129/// \param __m1
1130/// A 64-bit integer vector.
1131/// \param __m2
1132/// A 64-bit integer vector.
1133/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1134/// parameters.
Michael Kupersteine45af542015-06-30 13:36:19 +00001135static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001136_mm_xor_si64(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001137{
Craig Topper1aa231e2016-05-16 06:38:42 +00001138 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001139}
1140
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001141/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1142/// [8 x i8] to determine if the element of the first vector is equal to the
1143/// corresponding element of the second vector. The comparison yields 0 for
1144/// false, 0xFF for true.
1145///
1146/// \headerfile <x86intrin.h>
1147///
1148/// This intrinsic corresponds to the \c PCMPEQB instruction.
1149///
1150/// \param __m1
1151/// A 64-bit integer vector of [8 x i8].
1152/// \param __m2
1153/// A 64-bit integer vector of [8 x i8].
1154/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1155/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001156static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001157_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001158{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001159 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001160}
1161
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001162/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1163/// [4 x i16] to determine if the element of the first vector is equal to the
1164/// corresponding element of the second vector. The comparison yields 0 for
1165/// false, 0xFFFF for true.
1166///
1167/// \headerfile <x86intrin.h>
1168///
1169/// This intrinsic corresponds to the \c PCMPEQW instruction.
1170///
1171/// \param __m1
1172/// A 64-bit integer vector of [4 x i16].
1173/// \param __m2
1174/// A 64-bit integer vector of [4 x i16].
1175/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1176/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001177static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001178_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001179{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001180 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001181}
1182
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001183/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1184/// [2 x i32] to determine if the element of the first vector is equal to the
1185/// corresponding element of the second vector. The comparison yields 0 for
1186/// false, 0xFFFFFFFF for true.
1187///
1188/// \headerfile <x86intrin.h>
1189///
1190/// This intrinsic corresponds to the \c PCMPEQD instruction.
1191///
1192/// \param __m1
1193/// A 64-bit integer vector of [2 x i32].
1194/// \param __m2
1195/// A 64-bit integer vector of [2 x i32].
1196/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1197/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001198static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001199_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001200{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001201 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001202}
1203
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001204/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1205/// [8 x i8] to determine if the element of the first vector is greater than
1206/// the corresponding element of the second vector. The comparison yields 0
1207/// for false, 0xFF for true.
1208///
1209/// \headerfile <x86intrin.h>
1210///
1211/// This intrinsic corresponds to the \c PCMPGTB instruction.
1212///
1213/// \param __m1
1214/// A 64-bit integer vector of [8 x i8].
1215/// \param __m2
1216/// A 64-bit integer vector of [8 x i8].
1217/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1218/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001219static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001220_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001221{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001222 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001223}
1224
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001225/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1226/// [4 x i16] to determine if the element of the first vector is greater than
1227/// the corresponding element of the second vector. The comparison yields 0
1228/// for false, 0xFFFF for true.
1229///
1230/// \headerfile <x86intrin.h>
1231///
1232/// This intrinsic corresponds to the \c PCMPGTW instruction.
1233///
1234/// \param __m1
1235/// A 64-bit integer vector of [4 x i16].
1236/// \param __m2
1237/// A 64-bit integer vector of [4 x i16].
1238/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1239/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001240static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001241_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001242{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001243 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001244}
1245
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001246/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1247/// [2 x i32] to determine if the element of the first vector is greater than
1248/// the corresponding element of the second vector. The comparison yields 0
1249/// for false, 0xFFFFFFFF for true.
1250///
1251/// \headerfile <x86intrin.h>
1252///
1253/// This intrinsic corresponds to the \c PCMPGTD instruction.
1254///
1255/// \param __m1
1256/// A 64-bit integer vector of [2 x i32].
1257/// \param __m2
1258/// A 64-bit integer vector of [2 x i32].
1259/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1260/// results.
Michael Kupersteine45af542015-06-30 13:36:19 +00001261static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001262_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001263{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001264 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001265}
1266
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001267/// \brief Constructs a 64-bit integer vector initialized to zero.
1268///
1269/// \headerfile <x86intrin.h>
1270///
1271/// This intrinsic corresponds to the the \c VXORPS / XORPS instruction.
1272///
1273/// \returns An initialized 64-bit integer vector with all elements set to zero.
Michael Kupersteine45af542015-06-30 13:36:19 +00001274static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001275_mm_setzero_si64(void)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001276{
1277 return (__m64){ 0LL };
1278}
1279
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001280/// \brief Constructs a 64-bit integer vector initialized with the specified
1281/// 32-bit integer values.
1282///
1283/// \headerfile <x86intrin.h>
1284///
1285/// This intrinsic is a utility function and does not correspond to a specific
1286/// instruction.
1287///
1288/// \param __i1
1289/// A 32-bit integer value used to initialize the upper 32 bits of the
1290/// result.
1291/// \param __i0
1292/// A 32-bit integer value used to initialize the lower 32 bits of the
1293/// result.
1294/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001295static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001296_mm_set_pi32(int __i1, int __i0)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001297{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001298 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001299}
1300
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001301/// \brief Constructs a 64-bit integer vector initialized with the specified
1302/// 16-bit integer values.
1303///
1304/// \headerfile <x86intrin.h>
1305///
1306/// This intrinsic is a utility function and does not correspond to a specific
1307/// instruction.
1308///
1309/// \param __s3
1310/// A 16-bit integer value used to initialize bits [63:48] of the result.
1311/// \param __s2
1312/// A 16-bit integer value used to initialize bits [47:32] of the result.
1313/// \param __s1
1314/// A 16-bit integer value used to initialize bits [31:16] of the result.
1315/// \param __s0
1316/// A 16-bit integer value used to initialize bits [15:0] of the result.
1317/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001318static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001319_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001320{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001321 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001322}
1323
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001324/// \brief Constructs a 64-bit integer vector initialized with the specified
1325/// 8-bit integer values.
1326///
1327/// \headerfile <x86intrin.h>
1328///
1329/// This intrinsic is a utility function and does not correspond to a specific
1330/// instruction.
1331///
1332/// \param __b7
1333/// An 8-bit integer value used to initialize bits [63:56] of the result.
1334/// \param __b6
1335/// An 8-bit integer value used to initialize bits [55:48] of the result.
1336/// \param __b5
1337/// An 8-bit integer value used to initialize bits [47:40] of the result.
1338/// \param __b4
1339/// An 8-bit integer value used to initialize bits [39:32] of the result.
1340/// \param __b3
1341/// An 8-bit integer value used to initialize bits [31:24] of the result.
1342/// \param __b2
1343/// An 8-bit integer value used to initialize bits [23:16] of the result.
1344/// \param __b1
1345/// An 8-bit integer value used to initialize bits [15:8] of the result.
1346/// \param __b0
1347/// An 8-bit integer value used to initialize bits [7:0] of the result.
1348/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001349static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001350_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1351 char __b1, char __b0)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001352{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001353 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1354 __b4, __b5, __b6, __b7);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001355}
1356
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001357/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
1358/// 32-bit integer vector elements set to the specified 32-bit integer
1359/// value.
1360///
1361/// \headerfile <x86intrin.h>
1362///
1363/// This intrinsic corresponds to the \c VPSHUFD / PSHUFD instruction.
1364///
1365/// \param __i
1366/// A 32-bit integer value used to initialize each vector element of the
1367/// result.
1368/// \returns An initialized 64-bit integer vector of [2 x i32].
Michael Kupersteine45af542015-06-30 13:36:19 +00001369static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001370_mm_set1_pi32(int __i)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001371{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001372 return _mm_set_pi32(__i, __i);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001373}
1374
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001375/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
1376/// 16-bit integer vector elements set to the specified 16-bit integer
1377/// value.
1378///
1379/// \headerfile <x86intrin.h>
1380///
1381/// This intrinsic corresponds to the \c VPSHUFLW / PSHUFLW instruction.
1382///
1383/// \param __w
1384/// A 16-bit integer value used to initialize each vector element of the
1385/// result.
1386/// \returns An initialized 64-bit integer vector of [4 x i16].
Michael Kupersteine45af542015-06-30 13:36:19 +00001387static __inline__ __m64 __DEFAULT_FN_ATTRS
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001388_mm_set1_pi16(short __w)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001389{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001390 return _mm_set_pi16(__w, __w, __w, __w);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001391}
1392
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001393/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
1394/// 8-bit integer vector elements set to the specified 8-bit integer value.
1395///
1396/// \headerfile <x86intrin.h>
1397///
1398/// This intrinsic corresponds to the \c VPUNPCKLBW + VPSHUFLW / \c PUNPCKLBW +
1399/// PSHUFLW instruction.
1400///
1401/// \param __b
1402/// An 8-bit integer value used to initialize each vector element of the
1403/// result.
1404/// \returns An initialized 64-bit integer vector of [8 x i8].
Michael Kupersteine45af542015-06-30 13:36:19 +00001405static __inline__ __m64 __DEFAULT_FN_ATTRS
Mike Stumpeff0cc92009-02-14 18:02:21 +00001406_mm_set1_pi8(char __b)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001407{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001408 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001409}
1410
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001411/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1412/// the specified 32-bit integer values.
1413///
1414/// \headerfile <x86intrin.h>
1415///
1416/// This intrinsic is a utility function and does not correspond to a specific
1417/// instruction.
1418///
1419/// \param __i0
1420/// A 32-bit integer value used to initialize the lower 32 bits of the
1421/// result.
1422/// \param __i1
1423/// A 32-bit integer value used to initialize the upper 32 bits of the
1424/// result.
1425/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001426static __inline__ __m64 __DEFAULT_FN_ATTRS
Eli Friedmancb59baa2011-05-05 20:21:54 +00001427_mm_setr_pi32(int __i0, int __i1)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001428{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001429 return _mm_set_pi32(__i1, __i0);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001430}
1431
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001432/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1433/// the specified 16-bit integer values.
1434///
1435/// \headerfile <x86intrin.h>
1436///
1437/// This intrinsic is a utility function and does not correspond to a specific
1438/// instruction.
1439///
1440/// \param __w0
1441/// A 16-bit integer value used to initialize bits [15:0] of the result.
1442/// \param __w1
1443/// A 16-bit integer value used to initialize bits [31:16] of the result.
1444/// \param __w2
1445/// A 16-bit integer value used to initialize bits [47:32] of the result.
1446/// \param __w3
1447/// A 16-bit integer value used to initialize bits [63:48] of the result.
1448/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001449static __inline__ __m64 __DEFAULT_FN_ATTRS
Eli Friedmancb59baa2011-05-05 20:21:54 +00001450_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001451{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001452 return _mm_set_pi16(__w3, __w2, __w1, __w0);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001453}
1454
Ekaterina Romanova71a68c92016-06-10 00:10:40 +00001455/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1456/// the specified 8-bit integer values.
1457///
1458/// \headerfile <x86intrin.h>
1459///
1460/// This intrinsic is a utility function and does not correspond to a specific
1461/// instruction.
1462///
1463/// \param __b0
1464/// An 8-bit integer value used to initialize bits [7:0] of the result.
1465/// \param __b1
1466/// An 8-bit integer value used to initialize bits [15:8] of the result.
1467/// \param __b2
1468/// An 8-bit integer value used to initialize bits [23:16] of the result.
1469/// \param __b3
1470/// An 8-bit integer value used to initialize bits [31:24] of the result.
1471/// \param __b4
1472/// An 8-bit integer value used to initialize bits [39:32] of the result.
1473/// \param __b5
1474/// An 8-bit integer value used to initialize bits [47:40] of the result.
1475/// \param __b6
1476/// An 8-bit integer value used to initialize bits [55:48] of the result.
1477/// \param __b7
1478/// An 8-bit integer value used to initialize bits [63:56] of the result.
1479/// \returns An initialized 64-bit integer vector.
Michael Kupersteine45af542015-06-30 13:36:19 +00001480static __inline__ __m64 __DEFAULT_FN_ATTRS
Eli Friedmancb59baa2011-05-05 20:21:54 +00001481_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1482 char __b6, char __b7)
Anders Carlssona5e2e602008-03-03 19:29:06 +00001483{
Dale Johannesen39d6f4b2010-09-30 23:57:50 +00001484 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
Anders Carlssona5e2e602008-03-03 19:29:06 +00001485}
1486
Michael Kupersteine45af542015-06-30 13:36:19 +00001487#undef __DEFAULT_FN_ATTRS
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001488
1489/* Aliases for compatibility. */
1490#define _m_empty _mm_empty
1491#define _m_from_int _mm_cvtsi32_si64
Michael Kuperstein591278c2015-12-20 12:37:18 +00001492#define _m_from_int64 _mm_cvtsi64_m64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001493#define _m_to_int _mm_cvtsi64_si32
Michael Kuperstein591278c2015-12-20 12:37:18 +00001494#define _m_to_int64 _mm_cvtm64_si64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001495#define _m_packsswb _mm_packs_pi16
1496#define _m_packssdw _mm_packs_pi32
1497#define _m_packuswb _mm_packs_pu16
1498#define _m_punpckhbw _mm_unpackhi_pi8
1499#define _m_punpckhwd _mm_unpackhi_pi16
1500#define _m_punpckhdq _mm_unpackhi_pi32
1501#define _m_punpcklbw _mm_unpacklo_pi8
1502#define _m_punpcklwd _mm_unpacklo_pi16
1503#define _m_punpckldq _mm_unpacklo_pi32
1504#define _m_paddb _mm_add_pi8
1505#define _m_paddw _mm_add_pi16
1506#define _m_paddd _mm_add_pi32
1507#define _m_paddsb _mm_adds_pi8
1508#define _m_paddsw _mm_adds_pi16
1509#define _m_paddusb _mm_adds_pu8
1510#define _m_paddusw _mm_adds_pu16
1511#define _m_psubb _mm_sub_pi8
1512#define _m_psubw _mm_sub_pi16
1513#define _m_psubd _mm_sub_pi32
1514#define _m_psubsb _mm_subs_pi8
1515#define _m_psubsw _mm_subs_pi16
1516#define _m_psubusb _mm_subs_pu8
1517#define _m_psubusw _mm_subs_pu16
1518#define _m_pmaddwd _mm_madd_pi16
1519#define _m_pmulhw _mm_mulhi_pi16
1520#define _m_pmullw _mm_mullo_pi16
1521#define _m_psllw _mm_sll_pi16
1522#define _m_psllwi _mm_slli_pi16
1523#define _m_pslld _mm_sll_pi32
1524#define _m_pslldi _mm_slli_pi32
Chandler Carruth42cf8182010-08-08 08:44:32 +00001525#define _m_psllq _mm_sll_si64
1526#define _m_psllqi _mm_slli_si64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001527#define _m_psraw _mm_sra_pi16
1528#define _m_psrawi _mm_srai_pi16
1529#define _m_psrad _mm_sra_pi32
1530#define _m_psradi _mm_srai_pi32
1531#define _m_psrlw _mm_srl_pi16
1532#define _m_psrlwi _mm_srli_pi16
1533#define _m_psrld _mm_srl_pi32
1534#define _m_psrldi _mm_srli_pi32
Chandler Carruth42cf8182010-08-08 08:44:32 +00001535#define _m_psrlq _mm_srl_si64
1536#define _m_psrlqi _mm_srli_si64
Chandler Carruth96f2e9e2010-07-22 06:47:28 +00001537#define _m_pand _mm_and_si64
1538#define _m_pandn _mm_andnot_si64
1539#define _m_por _mm_or_si64
1540#define _m_pxor _mm_xor_si64
1541#define _m_pcmpeqb _mm_cmpeq_pi8
1542#define _m_pcmpeqw _mm_cmpeq_pi16
1543#define _m_pcmpeqd _mm_cmpeq_pi32
1544#define _m_pcmpgtb _mm_cmpgt_pi8
1545#define _m_pcmpgtw _mm_cmpgt_pi16
1546#define _m_pcmpgtd _mm_cmpgt_pi32
1547
Anders Carlssona5e2e602008-03-03 19:29:06 +00001548#endif /* __MMINTRIN_H */
1549