Blame - clang/lib/Headers/emmintrin.h - toolchain/llvm-project

blob: 5ac89dca1d5ec6c765bcf9e405282039a7b3ea2b [file] [log] [blame]

Benjamin Kramer	ae8ea1f	2010-08-20 16:47:17 +0000	[diff] [blame]	1	/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	2	*
				3	* Permission is hereby granted, free of charge, to any person obtaining a copy
				4	* of this software and associated documentation files (the "Software"), to deal
				5	* in the Software without restriction, including without limitation the rights
				6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				7	* copies of the Software, and to permit persons to whom the Software is
				8	* furnished to do so, subject to the following conditions:
				9	*
				10	* The above copyright notice and this permission notice shall be included in
				11	* all copies or substantial portions of the Software.
				12	*
				13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				19	* THE SOFTWARE.
				20	*
				21	*===-----------------------------------------------------------------------===
				22	*/
Benjamin Kramer	ae8ea1f	2010-08-20 16:47:17 +0000	[diff] [blame]	23
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	24	#ifndef __EMMINTRIN_H
				25	#define __EMMINTRIN_H
				26
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	27	#include <xmmintrin.h>
				28
				29	typedef double __m128d __attribute__((__vector_size__(16)));
				30	typedef long long __m128i __attribute__((__vector_size__(16)));
				31
Eric Christopher	2a9898f	2010-08-26 02:09:25 +0000	[diff] [blame]	32	/* Type defines. */
				33	typedef double __v2df __attribute__ ((__vector_size__ (16)));
				34	typedef long long __v2di __attribute__ ((__vector_size__ (16)));
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	35	typedef short __v8hi __attribute__((__vector_size__(16)));
Anders Carlsson	327c8df	2009-09-18 19:18:19 +0000	[diff] [blame]	36	typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	37
Simon Pilgrim	6d1a0c4	2016-05-29 18:49:08 +0000	[diff] [blame]	38	/* Unsigned types */
				39	typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
Simon Pilgrim	6d1a0c4	2016-05-29 18:49:08 +0000	[diff] [blame]	40	typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
				41	typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
				42
Chandler Carruth	cbe6411	2015-10-01 23:40:12 +0000	[diff] [blame]	43	/* We need an explicitly signed variant for char. Note that this shouldn't
				44	* appear in the interface though. */
				45	typedef signed char __v16qs __attribute__((__vector_size__(16)));
				46
Michael Kuperstein	a10dff9	2015-09-21 13:34:47 +0000	[diff] [blame]	47	#include <f16cintrin.h>
				48
Eric Christopher	4d185168	2015-06-17 07:09:20 +0000	[diff] [blame]	49	/* Define the default attributes for the functions in this file. */
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	50	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
Eric Christopher	4d185168	2015-06-17 07:09:20 +0000	[diff] [blame]	51
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	52	/// \brief Adds lower double-precision values in both operands and returns the
				53	/// sum in the lower 64 bits of the result. The upper 64 bits of the result
				54	/// are copied from the upper double-precision value of the first operand.
				55	///
				56	/// \headerfile <x86intrin.h>
				57	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	58	/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	59	///
				60	/// \param __a
				61	/// A 128-bit vector of [2 x double] containing one of the source operands.
				62	/// \param __b
				63	/// A 128-bit vector of [2 x double] containing one of the source operands.
				64	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				65	/// sum of the lower 64 bits of both operands. The upper 64 bits are copied
				66	/// from the upper 64 bits of the first source operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	67	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	68	_mm_add_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	69	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	70	__a[0] += __b[0];
				71	return __a;
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	72	}
				73
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	74	/// \brief Adds two 128-bit vectors of [2 x double].
				75	///
				76	/// \headerfile <x86intrin.h>
				77	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	78	/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	79	///
				80	/// \param __a
				81	/// A 128-bit vector of [2 x double] containing one of the source operands.
				82	/// \param __b
				83	/// A 128-bit vector of [2 x double] containing one of the source operands.
				84	/// \returns A 128-bit vector of [2 x double] containing the sums of both
				85	/// operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	86	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	87	_mm_add_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	88	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	89	return (__m128d)((__v2df)__a + (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	90	}
				91
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	92	/// \brief Subtracts the lower double-precision value of the second operand
				93	/// from the lower double-precision value of the first operand and returns
				94	/// the difference in the lower 64 bits of the result. The upper 64 bits of
				95	/// the result are copied from the upper double-precision value of the first
				96	/// operand.
				97	///
				98	/// \headerfile <x86intrin.h>
				99	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	100	/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	101	///
				102	/// \param __a
				103	/// A 128-bit vector of [2 x double] containing the minuend.
				104	/// \param __b
				105	/// A 128-bit vector of [2 x double] containing the subtrahend.
				106	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				107	/// difference of the lower 64 bits of both operands. The upper 64 bits are
				108	/// copied from the upper 64 bits of the first source operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	109	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	110	_mm_sub_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	111	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	112	__a[0] -= __b[0];
				113	return __a;
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	114	}
				115
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	116	/// \brief Subtracts two 128-bit vectors of [2 x double].
				117	///
				118	/// \headerfile <x86intrin.h>
				119	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	120	/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	121	///
				122	/// \param __a
				123	/// A 128-bit vector of [2 x double] containing the minuend.
				124	/// \param __b
				125	/// A 128-bit vector of [2 x double] containing the subtrahend.
				126	/// \returns A 128-bit vector of [2 x double] containing the differences between
				127	/// both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	128	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	129	_mm_sub_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	130	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	131	return (__m128d)((__v2df)__a - (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	132	}
				133
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	134	/// \brief Multiplies lower double-precision values in both operands and returns
				135	/// the product in the lower 64 bits of the result. The upper 64 bits of the
				136	/// result are copied from the upper double-precision value of the first
				137	/// operand.
				138	///
				139	/// \headerfile <x86intrin.h>
				140	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	141	/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	142	///
				143	/// \param __a
				144	/// A 128-bit vector of [2 x double] containing one of the source operands.
				145	/// \param __b
				146	/// A 128-bit vector of [2 x double] containing one of the source operands.
				147	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				148	/// product of the lower 64 bits of both operands. The upper 64 bits are
				149	/// copied from the upper 64 bits of the first source operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	150	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	151	_mm_mul_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	152	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	153	__a[0] *= __b[0];
				154	return __a;
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	155	}
				156
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	157	/// \brief Multiplies two 128-bit vectors of [2 x double].
				158	///
				159	/// \headerfile <x86intrin.h>
				160	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	161	/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	162	///
				163	/// \param __a
				164	/// A 128-bit vector of [2 x double] containing one of the operands.
				165	/// \param __b
				166	/// A 128-bit vector of [2 x double] containing one of the operands.
				167	/// \returns A 128-bit vector of [2 x double] containing the products of both
				168	/// operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	169	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	170	_mm_mul_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	171	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	172	return (__m128d)((__v2df)__a * (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	173	}
				174
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	175	/// \brief Divides the lower double-precision value of the first operand by the
				176	/// lower double-precision value of the second operand and returns the
				177	/// quotient in the lower 64 bits of the result. The upper 64 bits of the
				178	/// result are copied from the upper double-precision value of the first
				179	/// operand.
				180	///
				181	/// \headerfile <x86intrin.h>
				182	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	183	/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	184	///
				185	/// \param __a
				186	/// A 128-bit vector of [2 x double] containing the dividend.
				187	/// \param __b
				188	/// A 128-bit vector of [2 x double] containing divisor.
				189	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				190	/// quotient of the lower 64 bits of both operands. The upper 64 bits are
				191	/// copied from the upper 64 bits of the first source operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	192	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	193	_mm_div_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	194	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	195	__a[0] /= __b[0];
				196	return __a;
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	197	}
				198
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	199	/// \brief Performs an element-by-element division of two 128-bit vectors of
				200	/// [2 x double].
				201	///
				202	/// \headerfile <x86intrin.h>
				203	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	204	/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	205	///
				206	/// \param __a
				207	/// A 128-bit vector of [2 x double] containing the dividend.
				208	/// \param __b
				209	/// A 128-bit vector of [2 x double] containing the divisor.
				210	/// \returns A 128-bit vector of [2 x double] containing the quotients of both
				211	/// operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	212	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	213	_mm_div_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	214	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	215	return (__m128d)((__v2df)__a / (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	216	}
				217
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	218	/// \brief Calculates the square root of the lower double-precision value of
				219	/// the second operand and returns it in the lower 64 bits of the result.
				220	/// The upper 64 bits of the result are copied from the upper double-
				221	/// precision value of the first operand.
				222	///
				223	/// \headerfile <x86intrin.h>
				224	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	225	/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	226	///
				227	/// \param __a
				228	/// A 128-bit vector of [2 x double] containing one of the operands. The
				229	/// upper 64 bits of this operand are copied to the upper 64 bits of the
				230	/// result.
				231	/// \param __b
				232	/// A 128-bit vector of [2 x double] containing one of the operands. The
				233	/// square root is calculated using the lower 64 bits of this operand.
				234	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	235	/// square root of the lower 64 bits of operand \a __b, and whose upper 64
				236	/// bits are copied from the upper 64 bits of operand \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	237	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	238	_mm_sqrt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	239	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	240	__m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	241	return (__m128d) { __c[0], __a[1] };
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	242	}
				243
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	244	/// \brief Calculates the square root of the each of two values stored in a
				245	/// 128-bit vector of [2 x double].
				246	///
				247	/// \headerfile <x86intrin.h>
				248	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	249	/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	250	///
				251	/// \param __a
				252	/// A 128-bit vector of [2 x double].
				253	/// \returns A 128-bit vector of [2 x double] containing the square roots of the
				254	/// values in the operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	255	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	256	_mm_sqrt_pd(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	257	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	258	return __builtin_ia32_sqrtpd((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	259	}
				260
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	261	/// \brief Compares lower 64-bit double-precision values of both operands, and
				262	/// returns the lesser of the pair of values in the lower 64-bits of the
				263	/// result. The upper 64 bits of the result are copied from the upper double-
				264	/// precision value of the first operand.
				265	///
				266	/// \headerfile <x86intrin.h>
				267	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	268	/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	269	///
				270	/// \param __a
				271	/// A 128-bit vector of [2 x double] containing one of the operands. The
				272	/// lower 64 bits of this operand are used in the comparison.
				273	/// \param __b
				274	/// A 128-bit vector of [2 x double] containing one of the operands. The
				275	/// lower 64 bits of this operand are used in the comparison.
				276	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				277	/// minimum value between both operands. The upper 64 bits are copied from
				278	/// the upper 64 bits of the first source operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	279	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	280	_mm_min_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	281	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	282	return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	283	}
				284
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	285	/// \brief Performs element-by-element comparison of the two 128-bit vectors of
				286	/// [2 x double] and returns the vector containing the lesser of each pair of
				287	/// values.
				288	///
				289	/// \headerfile <x86intrin.h>
				290	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	291	/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	292	///
				293	/// \param __a
				294	/// A 128-bit vector of [2 x double] containing one of the operands.
				295	/// \param __b
				296	/// A 128-bit vector of [2 x double] containing one of the operands.
				297	/// \returns A 128-bit vector of [2 x double] containing the minimum values
				298	/// between both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	299	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	300	_mm_min_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	301	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	302	return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	303	}
				304
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	305	/// \brief Compares lower 64-bits double-precision values of both operands, and
				306	/// returns the greater of the pair of values in the lower 64-bits of the
				307	/// result. The upper 64 bits of the result are copied from the upper double-
				308	/// precision value of the first operand.
				309	///
				310	/// \headerfile <x86intrin.h>
				311	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	312	/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	313	///
				314	/// \param __a
				315	/// A 128-bit vector of [2 x double] containing one of the operands. The
				316	/// lower 64 bits of this operand are used in the comparison.
				317	/// \param __b
				318	/// A 128-bit vector of [2 x double] containing one of the operands. The
				319	/// lower 64 bits of this operand are used in the comparison.
				320	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				321	/// maximum value between both operands. The upper 64 bits are copied from
				322	/// the upper 64 bits of the first source operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	323	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	324	_mm_max_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	325	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	326	return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	327	}
				328
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	329	/// \brief Performs element-by-element comparison of the two 128-bit vectors of
				330	/// [2 x double] and returns the vector containing the greater of each pair
				331	/// of values.
				332	///
				333	/// \headerfile <x86intrin.h>
				334	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	335	/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	336	///
				337	/// \param __a
				338	/// A 128-bit vector of [2 x double] containing one of the operands.
				339	/// \param __b
				340	/// A 128-bit vector of [2 x double] containing one of the operands.
				341	/// \returns A 128-bit vector of [2 x double] containing the maximum values
				342	/// between both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	343	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	344	_mm_max_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	345	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	346	return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	347	}
				348
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	349	/// \brief Performs a bitwise AND of two 128-bit vectors of [2 x double].
				350	///
				351	/// \headerfile <x86intrin.h>
				352	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	353	/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	354	///
				355	/// \param __a
				356	/// A 128-bit vector of [2 x double] containing one of the source operands.
				357	/// \param __b
				358	/// A 128-bit vector of [2 x double] containing one of the source operands.
				359	/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
				360	/// values between both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	361	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	362	_mm_and_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	363	{
Craig Topper	d0681d5	2016-08-31 05:38:55 +0000	[diff] [blame]	364	return (__m128d)((__v2du)__a & (__v2du)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	365	}
				366
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	367	/// \brief Performs a bitwise AND of two 128-bit vectors of [2 x double], using
				368	/// the one's complement of the values contained in the first source operand.
				369	///
				370	/// \headerfile <x86intrin.h>
				371	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	372	/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	373	///
				374	/// \param __a
				375	/// A 128-bit vector of [2 x double] containing the left source operand. The
				376	/// one's complement of this value is used in the bitwise AND.
				377	/// \param __b
				378	/// A 128-bit vector of [2 x double] containing the right source operand.
				379	/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
				380	/// values in the second operand and the one's complement of the first
				381	/// operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	382	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	383	_mm_andnot_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	384	{
Craig Topper	d0681d5	2016-08-31 05:38:55 +0000	[diff] [blame]	385	return (__m128d)(~(__v2du)__a & (__v2du)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	386	}
				387
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	388	/// \brief Performs a bitwise OR of two 128-bit vectors of [2 x double].
				389	///
				390	/// \headerfile <x86intrin.h>
				391	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	392	/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	393	///
				394	/// \param __a
				395	/// A 128-bit vector of [2 x double] containing one of the source operands.
				396	/// \param __b
				397	/// A 128-bit vector of [2 x double] containing one of the source operands.
				398	/// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the
				399	/// values between both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	400	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	401	_mm_or_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	402	{
Craig Topper	d0681d5	2016-08-31 05:38:55 +0000	[diff] [blame]	403	return (__m128d)((__v2du)__a \| (__v2du)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	404	}
				405
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	406	/// \brief Performs a bitwise XOR of two 128-bit vectors of [2 x double].
				407	///
				408	/// \headerfile <x86intrin.h>
				409	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	410	/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	411	///
				412	/// \param __a
				413	/// A 128-bit vector of [2 x double] containing one of the source operands.
				414	/// \param __b
				415	/// A 128-bit vector of [2 x double] containing one of the source operands.
				416	/// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the
				417	/// values between both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	418	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	419	_mm_xor_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	420	{
Craig Topper	d0681d5	2016-08-31 05:38:55 +0000	[diff] [blame]	421	return (__m128d)((__v2du)__a ^ (__v2du)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	422	}
				423
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	424	/// \brief Compares each of the corresponding double-precision values of the
				425	/// 128-bit vectors of [2 x double] for equality. Each comparison yields 0h
				426	/// for false, FFFFFFFFFFFFFFFFh for true.
				427	///
				428	/// \headerfile <x86intrin.h>
				429	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	430	/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	431	///
				432	/// \param __a
				433	/// A 128-bit vector of [2 x double].
				434	/// \param __b
				435	/// A 128-bit vector of [2 x double].
				436	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	437	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	438	_mm_cmpeq_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	439	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	440	return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	441	}
				442
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	443	/// \brief Compares each of the corresponding double-precision values of the
				444	/// 128-bit vectors of [2 x double] to determine if the values in the first
				445	/// operand are less than those in the second operand. Each comparison
				446	/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				447	///
				448	/// \headerfile <x86intrin.h>
				449	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	450	/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	451	///
				452	/// \param __a
				453	/// A 128-bit vector of [2 x double].
				454	/// \param __b
				455	/// A 128-bit vector of [2 x double].
				456	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	457	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	458	_mm_cmplt_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	459	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	460	return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	461	}
				462
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	463	/// \brief Compares each of the corresponding double-precision values of the
				464	/// 128-bit vectors of [2 x double] to determine if the values in the first
				465	/// operand are less than or equal to those in the second operand. Each
				466	/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				467	///
				468	/// \headerfile <x86intrin.h>
				469	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	470	/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	471	///
				472	/// \param __a
				473	/// A 128-bit vector of [2 x double].
				474	/// \param __b
				475	/// A 128-bit vector of [2 x double].
				476	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	477	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	478	_mm_cmple_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	479	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	480	return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	481	}
				482
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	483	/// \brief Compares each of the corresponding double-precision values of the
				484	/// 128-bit vectors of [2 x double] to determine if the values in the first
				485	/// operand are greater than those in the second operand. Each comparison
				486	/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				487	///
				488	/// \headerfile <x86intrin.h>
				489	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	490	/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	491	///
				492	/// \param __a
				493	/// A 128-bit vector of [2 x double].
				494	/// \param __b
				495	/// A 128-bit vector of [2 x double].
				496	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	497	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	498	_mm_cmpgt_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	499	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	500	return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	501	}
				502
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	503	/// \brief Compares each of the corresponding double-precision values of the
				504	/// 128-bit vectors of [2 x double] to determine if the values in the first
				505	/// operand are greater than or equal to those in the second operand. Each
				506	/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				507	///
				508	/// \headerfile <x86intrin.h>
				509	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	510	/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	511	///
				512	/// \param __a
				513	/// A 128-bit vector of [2 x double].
				514	/// \param __b
				515	/// A 128-bit vector of [2 x double].
				516	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	517	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	518	_mm_cmpge_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	519	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	520	return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	521	}
				522
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	523	/// \brief Compares each of the corresponding double-precision values of the
				524	/// 128-bit vectors of [2 x double] to determine if the values in the first
				525	/// operand are ordered with respect to those in the second operand. A pair
				526	/// of double-precision values are "ordered" with respect to each other if
				527	/// neither value is a NaN. Each comparison yields 0h for false,
				528	/// FFFFFFFFFFFFFFFFh for true.
				529	///
				530	/// \headerfile <x86intrin.h>
				531	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	532	/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	533	///
				534	/// \param __a
				535	/// A 128-bit vector of [2 x double].
				536	/// \param __b
				537	/// A 128-bit vector of [2 x double].
				538	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	539	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	540	_mm_cmpord_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	541	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	542	return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	543	}
				544
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	545	/// \brief Compares each of the corresponding double-precision values of the
				546	/// 128-bit vectors of [2 x double] to determine if the values in the first
				547	/// operand are unordered with respect to those in the second operand. A pair
				548	/// of double-precision values are "unordered" with respect to each other if
				549	/// one or both values are NaN. Each comparison yields 0h for false,
				550	/// FFFFFFFFFFFFFFFFh for true.
				551	///
				552	/// \headerfile <x86intrin.h>
				553	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	554	/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>
				555	/// instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	556	///
				557	/// \param __a
				558	/// A 128-bit vector of [2 x double].
				559	/// \param __b
				560	/// A 128-bit vector of [2 x double].
				561	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	562	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	563	_mm_cmpunord_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	564	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	565	return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	566	}
				567
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	568	/// \brief Compares each of the corresponding double-precision values of the
				569	/// 128-bit vectors of [2 x double] to determine if the values in the first
				570	/// operand are unequal to those in the second operand. Each comparison
				571	/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				572	///
				573	/// \headerfile <x86intrin.h>
				574	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	575	/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	576	///
				577	/// \param __a
				578	/// A 128-bit vector of [2 x double].
				579	/// \param __b
				580	/// A 128-bit vector of [2 x double].
				581	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	582	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	583	_mm_cmpneq_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	584	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	585	return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	586	}
				587
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	588	/// \brief Compares each of the corresponding double-precision values of the
				589	/// 128-bit vectors of [2 x double] to determine if the values in the first
				590	/// operand are not less than those in the second operand. Each comparison
				591	/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				592	///
				593	/// \headerfile <x86intrin.h>
				594	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	595	/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	596	///
				597	/// \param __a
				598	/// A 128-bit vector of [2 x double].
				599	/// \param __b
				600	/// A 128-bit vector of [2 x double].
				601	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	602	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	603	_mm_cmpnlt_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	604	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	605	return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	606	}
				607
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	608	/// \brief Compares each of the corresponding double-precision values of the
				609	/// 128-bit vectors of [2 x double] to determine if the values in the first
				610	/// operand are not less than or equal to those in the second operand. Each
				611	/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				612	///
				613	/// \headerfile <x86intrin.h>
				614	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	615	/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	616	///
				617	/// \param __a
				618	/// A 128-bit vector of [2 x double].
				619	/// \param __b
				620	/// A 128-bit vector of [2 x double].
				621	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	622	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	623	_mm_cmpnle_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	624	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	625	return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	626	}
				627
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	628	/// \brief Compares each of the corresponding double-precision values of the
				629	/// 128-bit vectors of [2 x double] to determine if the values in the first
				630	/// operand are not greater than those in the second operand. Each
				631	/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				632	///
				633	/// \headerfile <x86intrin.h>
				634	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	635	/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	636	///
				637	/// \param __a
				638	/// A 128-bit vector of [2 x double].
				639	/// \param __b
				640	/// A 128-bit vector of [2 x double].
				641	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	642	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	643	_mm_cmpngt_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	644	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	645	return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	646	}
				647
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	648	/// \brief Compares each of the corresponding double-precision values of the
				649	/// 128-bit vectors of [2 x double] to determine if the values in the first
				650	/// operand are not greater than or equal to those in the second operand.
				651	/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				652	///
				653	/// \headerfile <x86intrin.h>
				654	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	655	/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	656	///
				657	/// \param __a
				658	/// A 128-bit vector of [2 x double].
				659	/// \param __b
				660	/// A 128-bit vector of [2 x double].
				661	/// \returns A 128-bit vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	662	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	663	_mm_cmpnge_pd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	664	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	665	return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	666	}
				667
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	668	/// \brief Compares the lower double-precision floating-point values in each of
				669	/// the two 128-bit floating-point vectors of [2 x double] for equality. The
				670	/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
				671	///
				672	/// \headerfile <x86intrin.h>
				673	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	674	/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	675	///
				676	/// \param __a
				677	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	678	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	679	/// \param __b
				680	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	681	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	682	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	683	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	684	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	685	_mm_cmpeq_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	686	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	687	return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	688	}
				689
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	690	/// \brief Compares the lower double-precision floating-point values in each of
				691	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				692	/// the value in the first parameter is less than the corresponding value in
				693	/// the second parameter. The comparison yields 0h for false,
				694	/// FFFFFFFFFFFFFFFFh for true.
				695	///
				696	/// \headerfile <x86intrin.h>
				697	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	698	/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	699	///
				700	/// \param __a
				701	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	702	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	703	/// \param __b
				704	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	705	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	706	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	707	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	708	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	709	_mm_cmplt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	710	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	711	return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	712	}
				713
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	714	/// \brief Compares the lower double-precision floating-point values in each of
				715	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				716	/// the value in the first parameter is less than or equal to the
				717	/// corresponding value in the second parameter. The comparison yields 0h for
				718	/// false, FFFFFFFFFFFFFFFFh for true.
				719	///
				720	/// \headerfile <x86intrin.h>
				721	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	722	/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	723	///
				724	/// \param __a
				725	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	726	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	727	/// \param __b
				728	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	729	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	730	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	731	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	732	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	733	_mm_cmple_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	734	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	735	return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	736	}
				737
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	738	/// \brief Compares the lower double-precision floating-point values in each of
				739	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				740	/// the value in the first parameter is greater than the corresponding value
				741	/// in the second parameter. The comparison yields 0h for false,
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	742	/// FFFFFFFFFFFFFFFFh for true.
				743	///
				744	/// \headerfile <x86intrin.h>
				745	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	746	/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	747	///
				748	/// \param __a
				749	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	750	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	751	/// \param __b
				752	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	753	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	754	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	755	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	756	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	757	_mm_cmpgt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	758	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	759	__m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
Manman Ren	9bb34d6	2013-06-17 19:42:49 +0000	[diff] [blame]	760	return (__m128d) { __c[0], __a[1] };
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	761	}
				762
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	763	/// \brief Compares the lower double-precision floating-point values in each of
				764	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				765	/// the value in the first parameter is greater than or equal to the
				766	/// corresponding value in the second parameter. The comparison yields 0h for
				767	/// false, FFFFFFFFFFFFFFFFh for true.
				768	///
				769	/// \headerfile <x86intrin.h>
				770	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	771	/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	772	///
				773	/// \param __a
				774	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	775	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	776	/// \param __b
				777	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	778	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	779	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	780	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	781	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	782	_mm_cmpge_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	783	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	784	__m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
Manman Ren	9bb34d6	2013-06-17 19:42:49 +0000	[diff] [blame]	785	return (__m128d) { __c[0], __a[1] };
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	786	}
				787
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	788	/// \brief Compares the lower double-precision floating-point values in each of
				789	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				790	/// the value in the first parameter is "ordered" with respect to the
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	791	/// corresponding value in the second parameter. The comparison yields 0h for
				792	/// false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are
				793	/// "ordered" with respect to each other if neither value is a NaN.
				794	///
				795	/// \headerfile <x86intrin.h>
				796	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	797	/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	798	///
				799	/// \param __a
				800	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	801	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	802	/// \param __b
				803	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	804	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	805	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	806	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	807	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	808	_mm_cmpord_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	809	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	810	return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	811	}
				812
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	813	/// \brief Compares the lower double-precision floating-point values in each of
				814	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				815	/// the value in the first parameter is "unordered" with respect to the
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	816	/// corresponding value in the second parameter. The comparison yields 0h
				817	/// for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values
				818	/// are "unordered" with respect to each other if one or both values are NaN.
				819	///
				820	/// \headerfile <x86intrin.h>
				821	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	822	/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>
				823	/// instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	824	///
				825	/// \param __a
				826	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	827	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	828	/// \param __b
				829	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	830	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	831	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	832	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	833	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	834	_mm_cmpunord_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	835	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	836	return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	837	}
				838
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	839	/// \brief Compares the lower double-precision floating-point values in each of
				840	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				841	/// the value in the first parameter is unequal to the corresponding value in
				842	/// the second parameter. The comparison yields 0h for false,
				843	/// FFFFFFFFFFFFFFFFh for true.
				844	///
				845	/// \headerfile <x86intrin.h>
				846	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	847	/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	848	///
				849	/// \param __a
				850	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	851	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	852	/// \param __b
				853	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	854	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	855	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	856	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	857	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	858	_mm_cmpneq_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	859	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	860	return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	861	}
				862
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	863	/// \brief Compares the lower double-precision floating-point values in each of
				864	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				865	/// the value in the first parameter is not less than the corresponding
				866	/// value in the second parameter. The comparison yields 0h for false,
				867	/// FFFFFFFFFFFFFFFFh for true.
				868	///
				869	/// \headerfile <x86intrin.h>
				870	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	871	/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	872	///
				873	/// \param __a
				874	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	875	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	876	/// \param __b
				877	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	878	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	879	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	880	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	881	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	882	_mm_cmpnlt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	883	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	884	return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	885	}
				886
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	887	/// \brief Compares the lower double-precision floating-point values in each of
				888	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				889	/// the value in the first parameter is not less than or equal to the
				890	/// corresponding value in the second parameter. The comparison yields 0h
				891	/// for false, FFFFFFFFFFFFFFFFh for true.
				892	///
				893	/// \headerfile <x86intrin.h>
				894	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	895	/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	896	///
				897	/// \param __a
				898	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	899	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	900	/// \param __b
				901	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	902	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	903	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	904	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	905	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	906	_mm_cmpnle_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	907	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	908	return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	909	}
				910
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	911	/// \brief Compares the lower double-precision floating-point values in each of
				912	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				913	/// the value in the first parameter is not greater than the corresponding
				914	/// value in the second parameter. The comparison yields 0h for false,
				915	/// FFFFFFFFFFFFFFFFh for true.
				916	///
				917	/// \headerfile <x86intrin.h>
				918	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	919	/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	920	///
				921	/// \param __a
				922	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	923	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	924	/// \param __b
				925	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	926	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	927	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	928	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	929	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	930	_mm_cmpngt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	931	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	932	__m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
Manman Ren	9bb34d6	2013-06-17 19:42:49 +0000	[diff] [blame]	933	return (__m128d) { __c[0], __a[1] };
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	934	}
				935
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	936	/// \brief Compares the lower double-precision floating-point values in each of
				937	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				938	/// the value in the first parameter is not greater than or equal to the
				939	/// corresponding value in the second parameter. The comparison yields 0h
				940	/// for false, FFFFFFFFFFFFFFFFh for true.
				941	///
				942	/// \headerfile <x86intrin.h>
				943	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	944	/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	945	///
				946	/// \param __a
				947	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	948	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	949	/// \param __b
				950	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	951	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	952	/// \returns A 128-bit vector. The lower 64 bits contains the comparison
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	953	/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	954	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	955	_mm_cmpnge_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	956	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	957	__m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
Manman Ren	9bb34d6	2013-06-17 19:42:49 +0000	[diff] [blame]	958	return (__m128d) { __c[0], __a[1] };
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	959	}
				960
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	961	/// \brief Compares the lower double-precision floating-point values in each of
				962	/// the two 128-bit floating-point vectors of [2 x double] for equality. The
				963	/// comparison yields 0 for false, 1 for true.
				964	///
				965	/// \headerfile <x86intrin.h>
				966	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	967	/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	968	///
				969	/// \param __a
				970	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	971	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	972	/// \param __b
				973	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	974	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	975	/// \returns An integer containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	976	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	977	_mm_comieq_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	978	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	979	return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	980	}
				981
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	982	/// \brief Compares the lower double-precision floating-point values in each of
				983	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				984	/// the value in the first parameter is less than the corresponding value in
				985	/// the second parameter. The comparison yields 0 for false, 1 for true.
				986	///
				987	/// \headerfile <x86intrin.h>
				988	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	989	/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	990	///
				991	/// \param __a
				992	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	993	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	994	/// \param __b
				995	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	996	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	997	/// \returns An integer containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	998	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	999	_mm_comilt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1000	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1001	return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1002	}
				1003
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1004	/// \brief Compares the lower double-precision floating-point values in each of
				1005	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1006	/// the value in the first parameter is less than or equal to the
				1007	/// corresponding value in the second parameter. The comparison yields 0 for
				1008	/// false, 1 for true.
				1009	///
				1010	/// \headerfile <x86intrin.h>
				1011	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1012	/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1013	///
				1014	/// \param __a
				1015	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1016	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1017	/// \param __b
				1018	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1019	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1020	/// \returns An integer containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1021	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1022	_mm_comile_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1023	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1024	return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1025	}
				1026
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1027	/// \brief Compares the lower double-precision floating-point values in each of
				1028	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1029	/// the value in the first parameter is greater than the corresponding value
				1030	/// in the second parameter. The comparison yields 0 for false, 1 for true.
				1031	///
				1032	/// \headerfile <x86intrin.h>
				1033	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1034	/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1035	///
				1036	/// \param __a
				1037	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1038	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1039	/// \param __b
				1040	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1041	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1042	/// \returns An integer containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1043	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1044	_mm_comigt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1045	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1046	return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1047	}
				1048
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1049	/// \brief Compares the lower double-precision floating-point values in each of
				1050	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1051	/// the value in the first parameter is greater than or equal to the
				1052	/// corresponding value in the second parameter. The comparison yields 0 for
				1053	/// false, 1 for true.
				1054	///
				1055	/// \headerfile <x86intrin.h>
				1056	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1057	/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1058	///
				1059	/// \param __a
				1060	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1061	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1062	/// \param __b
				1063	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1064	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1065	/// \returns An integer containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1066	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1067	_mm_comige_sd(__m128d __a, __m128d __b)
Eli Friedman	89c1133	2011-10-06 20:31:50 +0000	[diff] [blame]	1068	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1069	return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
Eli Friedman	89c1133	2011-10-06 20:31:50 +0000	[diff] [blame]	1070	}
				1071
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1072	/// \brief Compares the lower double-precision floating-point values in each of
				1073	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1074	/// the value in the first parameter is unequal to the corresponding value in
				1075	/// the second parameter. The comparison yields 0 for false, 1 for true.
				1076	///
				1077	/// \headerfile <x86intrin.h>
				1078	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1079	/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1080	///
				1081	/// \param __a
				1082	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1083	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1084	/// \param __b
				1085	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1086	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1087	/// \returns An integer containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1088	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1089	_mm_comineq_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1090	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1091	return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1092	}
				1093
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1094	/// \brief Compares the lower double-precision floating-point values in each of
				1095	/// the two 128-bit floating-point vectors of [2 x double] for equality. The
				1096	/// comparison yields 0 for false, 1 for true. If either of the two lower
				1097	/// double-precision values is NaN, 1 is returned.
				1098	///
				1099	/// \headerfile <x86intrin.h>
				1100	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1101	/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1102	///
				1103	/// \param __a
				1104	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1105	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1106	/// \param __b
				1107	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1108	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1109	/// \returns An integer containing the comparison results. If either of the two
				1110	/// lower double-precision values is NaN, 1 is returned.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1111	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1112	_mm_ucomieq_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1113	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1114	return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1115	}
				1116
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1117	/// \brief Compares the lower double-precision floating-point values in each of
				1118	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1119	/// the value in the first parameter is less than the corresponding value in
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	1120	/// the second parameter. The comparison yields 0 for false, 1 for true. If
				1121	/// either of the two lower double-precision values is NaN, 1 is returned.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1122	///
				1123	/// \headerfile <x86intrin.h>
				1124	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1125	/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1126	///
				1127	/// \param __a
				1128	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1129	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1130	/// \param __b
				1131	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1132	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1133	/// \returns An integer containing the comparison results. If either of the two
				1134	/// lower double-precision values is NaN, 1 is returned.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1135	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1136	_mm_ucomilt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1137	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1138	return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1139	}
				1140
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1141	/// \brief Compares the lower double-precision floating-point values in each of
				1142	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1143	/// the value in the first parameter is less than or equal to the
				1144	/// corresponding value in the second parameter. The comparison yields 0 for
				1145	/// false, 1 for true. If either of the two lower double-precision values is
				1146	/// NaN, 1 is returned.
				1147	///
				1148	/// \headerfile <x86intrin.h>
				1149	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1150	/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1151	///
				1152	/// \param __a
				1153	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1154	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1155	/// \param __b
				1156	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1157	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1158	/// \returns An integer containing the comparison results. If either of the two
				1159	/// lower double-precision values is NaN, 1 is returned.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1160	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1161	_mm_ucomile_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1162	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1163	return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1164	}
				1165
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1166	/// \brief Compares the lower double-precision floating-point values in each of
				1167	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1168	/// the value in the first parameter is greater than the corresponding value
				1169	/// in the second parameter. The comparison yields 0 for false, 1 for true.
				1170	/// If either of the two lower double-precision values is NaN, 0 is returned.
				1171	///
				1172	/// \headerfile <x86intrin.h>
				1173	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1174	/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1175	///
				1176	/// \param __a
				1177	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1178	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1179	/// \param __b
				1180	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1181	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1182	/// \returns An integer containing the comparison results. If either of the two
				1183	/// lower double-precision values is NaN, 0 is returned.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1184	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1185	_mm_ucomigt_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1186	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1187	return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1188	}
				1189
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1190	/// \brief Compares the lower double-precision floating-point values in each of
				1191	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1192	/// the value in the first parameter is greater than or equal to the
				1193	/// corresponding value in the second parameter. The comparison yields 0 for
				1194	/// false, 1 for true. If either of the two lower double-precision values
				1195	/// is NaN, 0 is returned.
				1196	///
				1197	/// \headerfile <x86intrin.h>
				1198	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1199	/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1200	///
				1201	/// \param __a
				1202	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1203	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1204	/// \param __b
				1205	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1206	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	1207	/// \returns An integer containing the comparison results. If either of the two
				1208	/// lower double-precision values is NaN, 0 is returned.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1209	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1210	_mm_ucomige_sd(__m128d __a, __m128d __b)
Eli Friedman	f8cb480	2011-08-29 21:26:24 +0000	[diff] [blame]	1211	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1212	return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
Eli Friedman	f8cb480	2011-08-29 21:26:24 +0000	[diff] [blame]	1213	}
				1214
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1215	/// \brief Compares the lower double-precision floating-point values in each of
				1216	/// the two 128-bit floating-point vectors of [2 x double] to determine if
				1217	/// the value in the first parameter is unequal to the corresponding value in
				1218	/// the second parameter. The comparison yields 0 for false, 1 for true. If
				1219	/// either of the two lower double-precision values is NaN, 0 is returned.
				1220	///
				1221	/// \headerfile <x86intrin.h>
				1222	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1223	/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1224	///
				1225	/// \param __a
				1226	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1227	/// compared to the lower double-precision value of \a __b.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1228	/// \param __b
				1229	/// A 128-bit vector of [2 x double]. The lower double-precision value is
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1230	/// compared to the lower double-precision value of \a __a.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1231	/// \returns An integer containing the comparison result. If either of the two
				1232	/// lower double-precision values is NaN, 0 is returned.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1233	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1234	_mm_ucomineq_sd(__m128d __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1235	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1236	return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1237	}
				1238
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1239	/// \brief Converts the two double-precision floating-point elements of a
				1240	/// 128-bit vector of [2 x double] into two single-precision floating-point
				1241	/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].
				1242	/// The upper 64 bits of the result vector are set to zero.
				1243	///
				1244	/// \headerfile <x86intrin.h>
				1245	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1246	/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1247	///
				1248	/// \param __a
				1249	/// A 128-bit vector of [2 x double].
				1250	/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
				1251	/// converted values. The upper 64 bits are set to zero.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1252	static __inline__ __m128 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1253	_mm_cvtpd_ps(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1254	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1255	return __builtin_ia32_cvtpd2ps((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1256	}
				1257
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1258	/// \brief Converts the lower two single-precision floating-point elements of a
				1259	/// 128-bit vector of [4 x float] into two double-precision floating-point
				1260	/// values, returned in a 128-bit vector of [2 x double]. The upper two
				1261	/// elements of the input vector are unused.
				1262	///
				1263	/// \headerfile <x86intrin.h>
				1264	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1265	/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1266	///
				1267	/// \param __a
				1268	/// A 128-bit vector of [4 x float]. The lower two single-precision
				1269	/// floating-point elements are converted to double-precision values. The
				1270	/// upper two elements are unused.
				1271	/// \returns A 128-bit vector of [2 x double] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1272	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1273	_mm_cvtps_pd(__m128 __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1274	{
Simon Pilgrim	90770c7	2016-05-23 22:13:02 +0000	[diff] [blame]	1275	return (__m128d) __builtin_convertvector(
				1276	__builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1277	}
				1278
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1279	/// \brief Converts the lower two integer elements of a 128-bit vector of
				1280	/// [4 x i32] into two double-precision floating-point values, returned in a
				1281	/// 128-bit vector of [2 x double]. The upper two elements of the input
				1282	/// vector are unused.
				1283	///
				1284	/// \headerfile <x86intrin.h>
				1285	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1286	/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1287	///
				1288	/// \param __a
				1289	/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are
				1290	/// converted to double-precision values. The upper two elements are unused.
				1291	/// \returns A 128-bit vector of [2 x double] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1292	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1293	_mm_cvtepi32_pd(__m128i __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1294	{
Simon Pilgrim	90770c7	2016-05-23 22:13:02 +0000	[diff] [blame]	1295	return (__m128d) __builtin_convertvector(
				1296	__builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1297	}
				1298
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1299	/// \brief Converts the two double-precision floating-point elements of a
				1300	/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
				1301	/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper
				1302	/// 64 bits of the result vector are set to zero.
				1303	///
				1304	/// \headerfile <x86intrin.h>
				1305	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1306	/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1307	///
				1308	/// \param __a
				1309	/// A 128-bit vector of [2 x double].
				1310	/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
				1311	/// converted values. The upper 64 bits are set to zero.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1312	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1313	_mm_cvtpd_epi32(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1314	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1315	return __builtin_ia32_cvtpd2dq((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1316	}
				1317
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1318	/// \brief Converts the low-order element of a 128-bit vector of [2 x double]
				1319	/// into a 32-bit signed integer value.
				1320	///
				1321	/// \headerfile <x86intrin.h>
				1322	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1323	/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1324	///
				1325	/// \param __a
				1326	/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
				1327	/// conversion.
				1328	/// \returns A 32-bit signed integer containing the converted value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1329	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1330	_mm_cvtsd_si32(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1331	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1332	return __builtin_ia32_cvtsd2si((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1333	}
				1334
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1335	/// \brief Converts the lower double-precision floating-point element of a
				1336	/// 128-bit vector of [2 x double], in the second parameter, into a
				1337	/// single-precision floating-point value, returned in the lower 32 bits of a
				1338	/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are
				1339	/// copied from the upper 96 bits of the first parameter.
				1340	///
				1341	/// \headerfile <x86intrin.h>
				1342	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1343	/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1344	///
				1345	/// \param __a
				1346	/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are
				1347	/// copied to the upper 96 bits of the result.
				1348	/// \param __b
				1349	/// A 128-bit vector of [2 x double]. The lower double-precision
				1350	/// floating-point element is used in the conversion.
				1351	/// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
				1352	/// converted value from the second parameter. The upper 96 bits are copied
				1353	/// from the upper 96 bits of the first parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1354	static __inline__ __m128 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1355	_mm_cvtsd_ss(__m128 __a, __m128d __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1356	{
Simon Pilgrim	e3b9ee0	2016-07-20 10:18:01 +0000	[diff] [blame]	1357	return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1358	}
				1359
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1360	/// \brief Converts a 32-bit signed integer value, in the second parameter, into
				1361	/// a double-precision floating-point value, returned in the lower 64 bits of
				1362	/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
				1363	/// are copied from the upper 64 bits of the first parameter.
				1364	///
				1365	/// \headerfile <x86intrin.h>
				1366	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1367	/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1368	///
				1369	/// \param __a
				1370	/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
				1371	/// copied to the upper 64 bits of the result.
				1372	/// \param __b
				1373	/// A 32-bit signed integer containing the value to be converted.
				1374	/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
				1375	/// converted value from the second parameter. The upper 64 bits are copied
				1376	/// from the upper 64 bits of the first parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1377	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1378	_mm_cvtsi32_sd(__m128d __a, int __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1379	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1380	__a[0] = __b;
				1381	return __a;
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1382	}
				1383
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1384	/// \brief Converts the lower single-precision floating-point element of a
				1385	/// 128-bit vector of [4 x float], in the second parameter, into a
				1386	/// double-precision floating-point value, returned in the lower 64 bits of
				1387	/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
				1388	/// are copied from the upper 64 bits of the first parameter.
				1389	///
				1390	/// \headerfile <x86intrin.h>
				1391	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1392	/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1393	///
				1394	/// \param __a
				1395	/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
				1396	/// copied to the upper 64 bits of the result.
				1397	/// \param __b
				1398	/// A 128-bit vector of [4 x float]. The lower single-precision
				1399	/// floating-point element is used in the conversion.
				1400	/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
				1401	/// converted value from the second parameter. The upper 64 bits are copied
				1402	/// from the upper 64 bits of the first parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1403	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1404	_mm_cvtss_sd(__m128d __a, __m128 __b)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1405	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1406	__a[0] = __b[0];
				1407	return __a;
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1408	}
				1409
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1410	/// \brief Converts the two double-precision floating-point elements of a
				1411	/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
				1412	/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. If the
				1413	/// result of either conversion is inexact, the result is truncated (rounded
				1414	/// towards zero) regardless of the current MXCSR setting. The upper 64 bits
				1415	/// of the result vector are set to zero.
				1416	///
				1417	/// \headerfile <x86intrin.h>
				1418	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	1419	/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>
				1420	/// instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1421	///
				1422	/// \param __a
				1423	/// A 128-bit vector of [2 x double].
				1424	/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
				1425	/// converted values. The upper 64 bits are set to zero.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1426	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1427	_mm_cvttpd_epi32(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1428	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1429	return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1430	}
				1431
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1432	/// \brief Converts the low-order element of a [2 x double] vector into a 32-bit
				1433	/// signed integer value, truncating the result when it is inexact.
				1434	///
				1435	/// \headerfile <x86intrin.h>
				1436	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	1437	/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
				1438	/// instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1439	///
				1440	/// \param __a
				1441	/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
				1442	/// conversion.
				1443	/// \returns A 32-bit signed integer containing the converted value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1444	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1445	_mm_cvttsd_si32(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1446	{
Simon Pilgrim	e3b9ee0	2016-07-20 10:18:01 +0000	[diff] [blame]	1447	return __builtin_ia32_cvttsd2si((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1448	}
				1449
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1450	/// \brief Converts the two double-precision floating-point elements of a
				1451	/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
				1452	/// returned in a 64-bit vector of [2 x i32].
				1453	///
				1454	/// \headerfile <x86intrin.h>
				1455	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1456	/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1457	///
				1458	/// \param __a
				1459	/// A 128-bit vector of [2 x double].
				1460	/// \returns A 64-bit vector of [2 x i32] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1461	static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1462	_mm_cvtpd_pi32(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1463	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1464	return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1465	}
				1466
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1467	/// \brief Converts the two double-precision floating-point elements of a
				1468	/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
				1469	/// returned in a 64-bit vector of [2 x i32]. If the result of either
				1470	/// conversion is inexact, the result is truncated (rounded towards zero)
				1471	/// regardless of the current MXCSR setting.
				1472	///
				1473	/// \headerfile <x86intrin.h>
				1474	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1475	/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1476	///
				1477	/// \param __a
				1478	/// A 128-bit vector of [2 x double].
				1479	/// \returns A 64-bit vector of [2 x i32] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1480	static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1481	_mm_cvttpd_pi32(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1482	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1483	return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1484	}
				1485
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1486	/// \brief Converts the two signed 32-bit integer elements of a 64-bit vector of
				1487	/// [2 x i32] into two double-precision floating-point values, returned in a
				1488	/// 128-bit vector of [2 x double].
				1489	///
				1490	/// \headerfile <x86intrin.h>
				1491	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1492	/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1493	///
				1494	/// \param __a
				1495	/// A 64-bit vector of [2 x i32].
				1496	/// \returns A 128-bit vector of [2 x double] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1497	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1498	_mm_cvtpi32_pd(__m64 __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1499	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1500	return __builtin_ia32_cvtpi2pd((__v2si)__a);
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1501	}
				1502
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1503	/// \brief Returns the low-order element of a 128-bit vector of [2 x double] as
				1504	/// a double-precision floating-point value.
				1505	///
				1506	/// \headerfile <x86intrin.h>
				1507	///
				1508	/// This intrinsic has no corresponding instruction.
				1509	///
				1510	/// \param __a
				1511	/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.
				1512	/// \returns A double-precision floating-point value copied from the lower 64
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	1513	/// bits of \a __a.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1514	static __inline__ double __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1515	_mm_cvtsd_f64(__m128d __a)
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1516	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1517	return __a[0];
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	1518	}
				1519
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1520	/// \brief Loads a 128-bit floating-point vector of [2 x double] from an aligned
				1521	/// memory location.
				1522	///
				1523	/// \headerfile <x86intrin.h>
				1524	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1525	/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1526	///
				1527	/// \param __dp
				1528	/// A pointer to a 128-bit memory location. The address of the memory
				1529	/// location has to be 16-byte aligned.
				1530	/// \returns A 128-bit vector of [2 x double] containing the loaded values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1531	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1532	_mm_load_pd(double const *__dp)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1533	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1534	return (__m128d)__dp;
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1535	}
				1536
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1537	/// \brief Loads a double-precision floating-point value from a specified memory
				1538	/// location and duplicates it to both vector elements of a 128-bit vector of
				1539	/// [2 x double].
				1540	///
				1541	/// \headerfile <x86intrin.h>
				1542	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1543	/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1544	///
				1545	/// \param __dp
				1546	/// A pointer to a memory location containing a double-precision value.
				1547	/// \returns A 128-bit vector of [2 x double] containing the loaded and
				1548	/// duplicated values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1549	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1550	_mm_load1_pd(double const *__dp)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1551	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1552	struct __mm_load1_pd_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1553	double __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1554	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1555	double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
				1556	return (__m128d){ __u, __u };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1557	}
				1558
Eli Friedman	f83c258	2009-06-02 05:55:48 +0000	[diff] [blame]	1559	#define _mm_load_pd1(dp) _mm_load1_pd(dp)
				1560
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1561	/// \brief Loads two double-precision values, in reverse order, from an aligned
				1562	/// memory location into a 128-bit vector of [2 x double].
				1563	///
				1564	/// \headerfile <x86intrin.h>
				1565	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	1566	/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +
				1567	/// needed shuffling instructions. In AVX mode, the shuffling may be combined
				1568	/// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1569	///
				1570	/// \param __dp
				1571	/// A 16-byte aligned pointer to an array of double-precision values to be
				1572	/// loaded in reverse order.
				1573	/// \returns A 128-bit vector of [2 x double] containing the reversed loaded
				1574	/// values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1575	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1576	_mm_loadr_pd(double const *__dp)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1577	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1578	__m128d __u = (__m128d)__dp;
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1579	return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1580	}
				1581
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1582	/// \brief Loads a 128-bit floating-point vector of [2 x double] from an
				1583	/// unaligned memory location.
				1584	///
				1585	/// \headerfile <x86intrin.h>
				1586	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1587	/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1588	///
				1589	/// \param __dp
				1590	/// A pointer to a 128-bit memory location. The address of the memory
				1591	/// location does not have to be aligned.
				1592	/// \returns A 128-bit vector of [2 x double] containing the loaded values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1593	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1594	_mm_loadu_pd(double const *__dp)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1595	{
Bill Wendling	502931f	2011-05-13 00:11:39 +0000	[diff] [blame]	1596	struct __loadu_pd {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1597	__m128d __v;
David Majnemer	1cf22e6	2015-02-04 00:26:10 +0000	[diff] [blame]	1598	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1599	return ((struct __loadu_pd*)__dp)->__v;
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1600	}
				1601
Ekaterina Romanova	2e041c9	2017-01-13 01:14:08 +0000	[diff] [blame^]	1602	/// \brief Loads a 64-bit integer value to the low element of a 128-bit integer
				1603	/// vector and clears the upper element.
				1604	///
				1605	/// \headerfile <x86intrin.h>
				1606	///
				1607	/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
				1608	///
				1609	/// \param __dp
				1610	/// A pointer to a 64-bit memory location. The address of the memory
				1611	/// location does not have to be aligned.
				1612	/// \returns A 128-bit vector of [2 x i64] containing the loaded value.
Asaf Badouh	57819aa	2016-06-26 13:51:54 +0000	[diff] [blame]	1613	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1614	_mm_loadu_si64(void const *__a)
				1615	{
				1616	struct __loadu_si64 {
				1617	long long __v;
				1618	} __attribute__((__packed__, __may_alias__));
				1619	long long __u = ((struct __loadu_si64*)__a)->__v;
				1620	return (__m128i){__u, 0L};
				1621	}
				1622
Ekaterina Romanova	2e041c9	2017-01-13 01:14:08 +0000	[diff] [blame^]	1623	/// \brief Loads a 64-bit double-precision value to the low element of a
				1624	/// 128-bit integer vector and clears the upper element.
				1625	///
				1626	/// \headerfile <x86intrin.h>
				1627	///
				1628	/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
				1629	///
				1630	/// \param __dp
				1631	/// An pointer to a memory location containing a double-precision value.
				1632	/// The address of the memory location does not have to be aligned.
				1633	/// \returns A 128-bit vector of [2 x double] containing the loaded value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1634	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1635	_mm_load_sd(double const *__dp)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1636	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1637	struct __mm_load_sd_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1638	double __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1639	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1640	double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
				1641	return (__m128d){ __u, 0 };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1642	}
				1643
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1644	/// \brief Loads a double-precision value into the high-order bits of a 128-bit
				1645	/// vector of [2 x double]. The low-order bits are copied from the low-order
				1646	/// bits of the first operand.
				1647	///
				1648	/// \headerfile <x86intrin.h>
				1649	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1650	/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1651	///
				1652	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	1653	/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1654	/// Bits [63:0] are written to bits [63:0] of the result.
				1655	/// \param __dp
				1656	/// A pointer to a 64-bit memory location containing a double-precision
				1657	/// floating-point value that is loaded. The loaded value is written to bits
				1658	/// [127:64] of the result. The address of the memory location does not have
				1659	/// to be aligned.
				1660	/// \returns A 128-bit vector of [2 x double] containing the moved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1661	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1662	_mm_loadh_pd(__m128d __a, double const *__dp)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1663	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1664	struct __mm_loadh_pd_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1665	double __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1666	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1667	double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
				1668	return (__m128d){ __a[0], __u };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1669	}
				1670
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1671	/// \brief Loads a double-precision value into the low-order bits of a 128-bit
				1672	/// vector of [2 x double]. The high-order bits are copied from the
				1673	/// high-order bits of the first operand.
				1674	///
				1675	/// \headerfile <x86intrin.h>
				1676	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1677	/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1678	///
				1679	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	1680	/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1681	/// Bits [127:64] are written to bits [127:64] of the result.
				1682	/// \param __dp
				1683	/// A pointer to a 64-bit memory location containing a double-precision
				1684	/// floating-point value that is loaded. The loaded value is written to bits
				1685	/// [63:0] of the result. The address of the memory location does not have to
				1686	/// be aligned.
				1687	/// \returns A 128-bit vector of [2 x double] containing the moved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1688	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1689	_mm_loadl_pd(__m128d __a, double const *__dp)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1690	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1691	struct __mm_loadl_pd_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1692	double __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1693	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1694	double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
				1695	return (__m128d){ __u, __a[1] };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1696	}
				1697
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1698	/// \brief Constructs a 128-bit floating-point vector of [2 x double] with
				1699	/// unspecified content. This could be used as an argument to another
				1700	/// intrinsic function where the argument is required but the value is not
				1701	/// actually used.
				1702	///
				1703	/// \headerfile <x86intrin.h>
				1704	///
				1705	/// This intrinsic has no corresponding instruction.
				1706	///
				1707	/// \returns A 128-bit floating-point vector of [2 x double] with unspecified
				1708	/// content.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1709	static __inline__ __m128d __DEFAULT_FN_ATTRS
Craig Topper	3a0c726	2016-06-09 05:14:28 +0000	[diff] [blame]	1710	_mm_undefined_pd(void)
Simon Pilgrim	5aba992	2015-08-26 21:17:12 +0000	[diff] [blame]	1711	{
				1712	return (__m128d)__builtin_ia32_undef128();
				1713	}
				1714
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1715	/// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower
				1716	/// 64 bits of the vector are initialized with the specified double-precision
				1717	/// floating-point value. The upper 64 bits are set to zero.
				1718	///
				1719	/// \headerfile <x86intrin.h>
				1720	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1721	/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1722	///
				1723	/// \param __w
				1724	/// A double-precision floating-point value used to initialize the lower 64
				1725	/// bits of the result.
				1726	/// \returns An initialized 128-bit floating-point vector of [2 x double]. The
				1727	/// lower 64 bits contain the value of the parameter. The upper 64 bits are
				1728	/// set to zero.
Simon Pilgrim	5aba992	2015-08-26 21:17:12 +0000	[diff] [blame]	1729	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1730	_mm_set_sd(double __w)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1731	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1732	return (__m128d){ __w, 0 };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1733	}
				1734
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1735	/// \brief Constructs a 128-bit floating-point vector of [2 x double], with each
				1736	/// of the two double-precision floating-point vector elements set to the
				1737	/// specified double-precision floating-point value.
				1738	///
				1739	/// \headerfile <x86intrin.h>
				1740	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1741	/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1742	///
				1743	/// \param __w
				1744	/// A double-precision floating-point value used to initialize each vector
				1745	/// element of the result.
				1746	/// \returns An initialized 128-bit floating-point vector of [2 x double].
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1747	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1748	_mm_set1_pd(double __w)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1749	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1750	return (__m128d){ __w, __w };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1751	}
				1752
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1753	/// \brief Constructs a 128-bit floating-point vector of [2 x double]
				1754	/// initialized with the specified double-precision floating-point values.
				1755	///
				1756	/// \headerfile <x86intrin.h>
				1757	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1758	/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1759	///
				1760	/// \param __w
				1761	/// A double-precision floating-point value used to initialize the upper 64
				1762	/// bits of the result.
				1763	/// \param __x
				1764	/// A double-precision floating-point value used to initialize the lower 64
				1765	/// bits of the result.
				1766	/// \returns An initialized 128-bit floating-point vector of [2 x double].
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1767	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1768	_mm_set_pd(double __w, double __x)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1769	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1770	return (__m128d){ __x, __w };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1771	}
				1772
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1773	/// \brief Constructs a 128-bit floating-point vector of [2 x double],
				1774	/// initialized in reverse order with the specified double-precision
				1775	/// floating-point values.
				1776	///
				1777	/// \headerfile <x86intrin.h>
				1778	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1779	/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1780	///
				1781	/// \param __w
				1782	/// A double-precision floating-point value used to initialize the lower 64
				1783	/// bits of the result.
				1784	/// \param __x
				1785	/// A double-precision floating-point value used to initialize the upper 64
				1786	/// bits of the result.
				1787	/// \returns An initialized 128-bit floating-point vector of [2 x double].
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1788	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1789	_mm_setr_pd(double __w, double __x)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1790	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1791	return (__m128d){ __w, __x };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1792	}
				1793
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1794	/// \brief Constructs a 128-bit floating-point vector of [2 x double]
				1795	/// initialized to zero.
				1796	///
				1797	/// \headerfile <x86intrin.h>
				1798	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1799	/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1800	///
				1801	/// \returns An initialized 128-bit floating-point vector of [2 x double] with
				1802	/// all elements set to zero.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1803	static __inline__ __m128d __DEFAULT_FN_ATTRS
Mike Stump	5b31ed3	2009-02-13 14:24:50 +0000	[diff] [blame]	1804	_mm_setzero_pd(void)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1805	{
				1806	return (__m128d){ 0, 0 };
				1807	}
				1808
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1809	/// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower
				1810	/// 64 bits are set to the lower 64 bits of the second parameter. The upper
				1811	/// 64 bits are set to the upper 64 bits of the first parameter.
				1812	//
				1813	/// \headerfile <x86intrin.h>
				1814	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1815	/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1816	///
				1817	/// \param __a
				1818	/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the
				1819	/// upper 64 bits of the result.
				1820	/// \param __b
				1821	/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the
				1822	/// lower 64 bits of the result.
				1823	/// \returns A 128-bit vector of [2 x double] containing the moved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1824	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1825	_mm_move_sd(__m128d __a, __m128d __b)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1826	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1827	return (__m128d){ __b[0], __a[1] };
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1828	}
				1829
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1830	/// \brief Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
				1831	/// memory location.
				1832	///
				1833	/// \headerfile <x86intrin.h>
				1834	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1835	/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1836	///
				1837	/// \param __dp
				1838	/// A pointer to a 64-bit memory location.
				1839	/// \param __a
				1840	/// A 128-bit vector of [2 x double] containing the value to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1841	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1842	_mm_store_sd(double *__dp, __m128d __a)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1843	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1844	struct __mm_store_sd_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1845	double __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1846	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1847	((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1848	}
				1849
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1850	static __inline__ void __DEFAULT_FN_ATTRS
Simon Pilgrim	645e1ad	2016-05-30 17:55:25 +0000	[diff] [blame]	1851	_mm_store_pd(double *__dp, __m128d __a)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1852	{
Simon Pilgrim	645e1ad	2016-05-30 17:55:25 +0000	[diff] [blame]	1853	(__m128d)__dp = __a;
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1854	}
				1855
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1856	static __inline__ void __DEFAULT_FN_ATTRS
Simon Pilgrim	645e1ad	2016-05-30 17:55:25 +0000	[diff] [blame]	1857	_mm_store1_pd(double *__dp, __m128d __a)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1858	{
Simon Pilgrim	645e1ad	2016-05-30 17:55:25 +0000	[diff] [blame]	1859	__a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
				1860	_mm_store_pd(__dp, __a);
				1861	}
				1862
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1863	/// \brief Stores a 128-bit vector of [2 x double] into an aligned memory
				1864	/// location.
				1865	///
				1866	/// \headerfile <x86intrin.h>
				1867	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1868	/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1869	///
				1870	/// \param __dp
				1871	/// A pointer to a 128-bit memory location. The address of the memory
				1872	/// location has to be 16-byte aligned.
				1873	/// \param __a
				1874	/// A 128-bit vector of [2 x double] containing the values to be stored.
Simon Pilgrim	645e1ad	2016-05-30 17:55:25 +0000	[diff] [blame]	1875	static __inline__ void __DEFAULT_FN_ATTRS
				1876	_mm_store_pd1(double *__dp, __m128d __a)
				1877	{
				1878	return _mm_store1_pd(__dp, __a);
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1879	}
				1880
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1881	/// \brief Stores a 128-bit vector of [2 x double] into an unaligned memory
				1882	/// location.
				1883	///
				1884	/// \headerfile <x86intrin.h>
				1885	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1886	/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1887	///
				1888	/// \param __dp
				1889	/// A pointer to a 128-bit memory location. The address of the memory
				1890	/// location does not have to be aligned.
				1891	/// \param __a
				1892	/// A 128-bit vector of [2 x double] containing the values to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1893	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1894	_mm_storeu_pd(double *__dp, __m128d __a)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1895	{
Craig Topper	09175da	2016-05-30 17:10:30 +0000	[diff] [blame]	1896	struct __storeu_pd {
				1897	__m128d __v;
				1898	} __attribute__((__packed__, __may_alias__));
				1899	((struct __storeu_pd*)__dp)->__v = __a;
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1900	}
				1901
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1902	/// \brief Stores two double-precision values, in reverse order, from a 128-bit
				1903	/// vector of [2 x double] to a 16-byte aligned memory location.
				1904	///
				1905	/// \headerfile <x86intrin.h>
				1906	///
				1907	/// This intrinsic corresponds to a shuffling instruction followed by a
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1908	/// <c> VMOVAPD / MOVAPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1909	///
				1910	/// \param __dp
				1911	/// A pointer to a 16-byte aligned memory location that can store two
				1912	/// double-precision values.
				1913	/// \param __a
				1914	/// A 128-bit vector of [2 x double] containing the values to be reversed and
				1915	/// stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1916	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1917	_mm_storer_pd(double *__dp, __m128d __a)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1918	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	1919	__a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1920	(__m128d )__dp = __a;
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1921	}
				1922
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1923	/// \brief Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
				1924	/// memory location.
				1925	///
				1926	/// \headerfile <x86intrin.h>
				1927	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1928	/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1929	///
				1930	/// \param __dp
				1931	/// A pointer to a 64-bit memory location.
				1932	/// \param __a
				1933	/// A 128-bit vector of [2 x double] containing the value to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1934	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1935	_mm_storeh_pd(double *__dp, __m128d __a)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1936	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1937	struct __mm_storeh_pd_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1938	double __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1939	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1940	((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1941	}
				1942
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1943	/// \brief Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
				1944	/// memory location.
				1945	///
				1946	/// \headerfile <x86intrin.h>
				1947	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1948	/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1949	///
				1950	/// \param __dp
				1951	/// A pointer to a 64-bit memory location.
				1952	/// \param __a
				1953	/// A 128-bit vector of [2 x double] containing the value to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1954	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1955	_mm_storel_pd(double *__dp, __m128d __a)
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1956	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1957	struct __mm_storeh_pd_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1958	double __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	1959	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1960	((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
Anders Carlsson	b08ac0b	2008-12-24 02:11:54 +0000	[diff] [blame]	1961	}
				1962
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1963	/// \brief Adds the corresponding elements of two 128-bit vectors of [16 x i8],
				1964	/// saving the lower 8 bits of each sum in the corresponding element of a
				1965	/// 128-bit result vector of [16 x i8]. The integer elements of both
				1966	/// parameters can be either signed or unsigned.
				1967	///
				1968	/// \headerfile <x86intrin.h>
				1969	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1970	/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1971	///
				1972	/// \param __a
				1973	/// A 128-bit vector of [16 x i8].
				1974	/// \param __b
				1975	/// A 128-bit vector of [16 x i8].
				1976	/// \returns A 128-bit vector of [16 x i8] containing the sums of both
				1977	/// parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1978	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	1979	_mm_add_epi8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	1980	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	1981	return (__m128i)((__v16qu)__a + (__v16qu)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	1982	}
				1983
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1984	/// \brief Adds the corresponding elements of two 128-bit vectors of [8 x i16],
				1985	/// saving the lower 16 bits of each sum in the corresponding element of a
				1986	/// 128-bit result vector of [8 x i16]. The integer elements of both
				1987	/// parameters can be either signed or unsigned.
				1988	///
				1989	/// \headerfile <x86intrin.h>
				1990	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	1991	/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	1992	///
				1993	/// \param __a
				1994	/// A 128-bit vector of [8 x i16].
				1995	/// \param __b
				1996	/// A 128-bit vector of [8 x i16].
				1997	/// \returns A 128-bit vector of [8 x i16] containing the sums of both
				1998	/// parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	1999	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2000	_mm_add_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2001	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2002	return (__m128i)((__v8hu)__a + (__v8hu)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2003	}
				2004
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2005	/// \brief Adds the corresponding elements of two 128-bit vectors of [4 x i32],
				2006	/// saving the lower 32 bits of each sum in the corresponding element of a
				2007	/// 128-bit result vector of [4 x i32]. The integer elements of both
				2008	/// parameters can be either signed or unsigned.
				2009	///
				2010	/// \headerfile <x86intrin.h>
				2011	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2012	/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2013	///
				2014	/// \param __a
				2015	/// A 128-bit vector of [4 x i32].
				2016	/// \param __b
				2017	/// A 128-bit vector of [4 x i32].
				2018	/// \returns A 128-bit vector of [4 x i32] containing the sums of both
				2019	/// parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2020	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2021	_mm_add_epi32(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2022	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2023	return (__m128i)((__v4su)__a + (__v4su)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2024	}
				2025
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2026	/// \brief Adds two signed or unsigned 64-bit integer values, returning the
				2027	/// lower 64 bits of the sum.
				2028	///
				2029	/// \headerfile <x86intrin.h>
				2030	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2031	/// This intrinsic corresponds to the <c> PADDQ </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2032	///
				2033	/// \param __a
				2034	/// A 64-bit integer.
				2035	/// \param __b
				2036	/// A 64-bit integer.
				2037	/// \returns A 64-bit integer containing the sum of both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2038	static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2039	_mm_add_si64(__m64 __a, __m64 __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2040	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	2041	return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2042	}
				2043
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2044	/// \brief Adds the corresponding elements of two 128-bit vectors of [2 x i64],
				2045	/// saving the lower 64 bits of each sum in the corresponding element of a
				2046	/// 128-bit result vector of [2 x i64]. The integer elements of both
				2047	/// parameters can be either signed or unsigned.
				2048	///
				2049	/// \headerfile <x86intrin.h>
				2050	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2051	/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2052	///
				2053	/// \param __a
				2054	/// A 128-bit vector of [2 x i64].
				2055	/// \param __b
				2056	/// A 128-bit vector of [2 x i64].
				2057	/// \returns A 128-bit vector of [2 x i64] containing the sums of both
				2058	/// parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2059	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2060	_mm_add_epi64(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2061	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2062	return (__m128i)((__v2du)__a + (__v2du)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2063	}
				2064
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2065	/// \brief Adds, with saturation, the corresponding elements of two 128-bit
				2066	/// signed [16 x i8] vectors, saving each sum in the corresponding element of
				2067	/// a 128-bit result vector of [16 x i8]. Positive sums greater than 7Fh are
				2068	/// saturated to 7Fh. Negative sums less than 80h are saturated to 80h.
				2069	///
				2070	/// \headerfile <x86intrin.h>
				2071	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2072	/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2073	///
				2074	/// \param __a
				2075	/// A 128-bit signed [16 x i8] vector.
				2076	/// \param __b
				2077	/// A 128-bit signed [16 x i8] vector.
				2078	/// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
				2079	/// both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2080	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2081	_mm_adds_epi8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2082	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2083	return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2084	}
				2085
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2086	/// \brief Adds, with saturation, the corresponding elements of two 128-bit
				2087	/// signed [8 x i16] vectors, saving each sum in the corresponding element of
				2088	/// a 128-bit result vector of [8 x i16]. Positive sums greater than 7FFFh
				2089	/// are saturated to 7FFFh. Negative sums less than 8000h are saturated to
				2090	/// 8000h.
				2091	///
				2092	/// \headerfile <x86intrin.h>
				2093	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2094	/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2095	///
				2096	/// \param __a
				2097	/// A 128-bit signed [8 x i16] vector.
				2098	/// \param __b
				2099	/// A 128-bit signed [8 x i16] vector.
				2100	/// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
				2101	/// both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2102	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2103	_mm_adds_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2104	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2105	return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2106	}
				2107
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2108	/// \brief Adds, with saturation, the corresponding elements of two 128-bit
				2109	/// unsigned [16 x i8] vectors, saving each sum in the corresponding element
				2110	/// of a 128-bit result vector of [16 x i8]. Positive sums greater than FFh
				2111	/// are saturated to FFh. Negative sums are saturated to 00h.
				2112	///
				2113	/// \headerfile <x86intrin.h>
				2114	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2115	/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2116	///
				2117	/// \param __a
				2118	/// A 128-bit unsigned [16 x i8] vector.
				2119	/// \param __b
				2120	/// A 128-bit unsigned [16 x i8] vector.
				2121	/// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums
				2122	/// of both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2123	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2124	_mm_adds_epu8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2125	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2126	return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2127	}
				2128
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2129	/// \brief Adds, with saturation, the corresponding elements of two 128-bit
				2130	/// unsigned [8 x i16] vectors, saving each sum in the corresponding element
				2131	/// of a 128-bit result vector of [8 x i16]. Positive sums greater than FFFFh
				2132	/// are saturated to FFFFh. Negative sums are saturated to 0000h.
				2133	///
				2134	/// \headerfile <x86intrin.h>
				2135	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2136	/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2137	///
				2138	/// \param __a
				2139	/// A 128-bit unsigned [8 x i16] vector.
				2140	/// \param __b
				2141	/// A 128-bit unsigned [8 x i16] vector.
				2142	/// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums
				2143	/// of both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2144	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2145	_mm_adds_epu16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2146	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2147	return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2148	}
				2149
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2150	/// \brief Computes the rounded avarages of corresponding elements of two
				2151	/// 128-bit unsigned [16 x i8] vectors, saving each result in the
				2152	/// corresponding element of a 128-bit result vector of [16 x i8].
				2153	///
				2154	/// \headerfile <x86intrin.h>
				2155	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2156	/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2157	///
				2158	/// \param __a
				2159	/// A 128-bit unsigned [16 x i8] vector.
				2160	/// \param __b
				2161	/// A 128-bit unsigned [16 x i8] vector.
				2162	/// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
				2163	/// averages of both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2164	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2165	_mm_avg_epu8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2166	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2167	return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2168	}
				2169
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2170	/// \brief Computes the rounded avarages of corresponding elements of two
				2171	/// 128-bit unsigned [8 x i16] vectors, saving each result in the
				2172	/// corresponding element of a 128-bit result vector of [8 x i16].
				2173	///
				2174	/// \headerfile <x86intrin.h>
				2175	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2176	/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2177	///
				2178	/// \param __a
				2179	/// A 128-bit unsigned [8 x i16] vector.
				2180	/// \param __b
				2181	/// A 128-bit unsigned [8 x i16] vector.
				2182	/// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
				2183	/// averages of both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2184	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2185	_mm_avg_epu16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2186	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2187	return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2188	}
				2189
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2190	/// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]
				2191	/// vectors, producing eight intermediate 32-bit signed integer products, and
				2192	/// adds the consecutive pairs of 32-bit products to form a 128-bit signed
				2193	/// [4 x i32] vector. For example, bits [15:0] of both parameters are
				2194	/// multiplied producing a 32-bit product, bits [31:16] of both parameters
				2195	/// are multiplied producing a 32-bit product, and the sum of those two
				2196	/// products becomes bits [31:0] of the result.
				2197	///
				2198	/// \headerfile <x86intrin.h>
				2199	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2200	/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2201	///
				2202	/// \param __a
				2203	/// A 128-bit signed [8 x i16] vector.
				2204	/// \param __b
				2205	/// A 128-bit signed [8 x i16] vector.
				2206	/// \returns A 128-bit signed [4 x i32] vector containing the sums of products
				2207	/// of both parameters.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2208	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2209	_mm_madd_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2210	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2211	return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2212	}
				2213
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2214	/// \brief Compares corresponding elements of two 128-bit signed [8 x i16]
				2215	/// vectors, saving the greater value from each comparison in the
				2216	/// corresponding element of a 128-bit result vector of [8 x i16].
				2217	///
				2218	/// \headerfile <x86intrin.h>
				2219	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2220	/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2221	///
				2222	/// \param __a
				2223	/// A 128-bit signed [8 x i16] vector.
				2224	/// \param __b
				2225	/// A 128-bit signed [8 x i16] vector.
				2226	/// \returns A 128-bit signed [8 x i16] vector containing the greater value of
				2227	/// each comparison.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2228	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2229	_mm_max_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2230	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2231	return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2232	}
				2233
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2234	/// \brief Compares corresponding elements of two 128-bit unsigned [16 x i8]
				2235	/// vectors, saving the greater value from each comparison in the
				2236	/// corresponding element of a 128-bit result vector of [16 x i8].
				2237	///
				2238	/// \headerfile <x86intrin.h>
				2239	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2240	/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2241	///
				2242	/// \param __a
				2243	/// A 128-bit unsigned [16 x i8] vector.
				2244	/// \param __b
				2245	/// A 128-bit unsigned [16 x i8] vector.
				2246	/// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
				2247	/// each comparison.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2248	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2249	_mm_max_epu8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2250	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2251	return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2252	}
				2253
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2254	/// \brief Compares corresponding elements of two 128-bit signed [8 x i16]
				2255	/// vectors, saving the smaller value from each comparison in the
				2256	/// corresponding element of a 128-bit result vector of [8 x i16].
				2257	///
				2258	/// \headerfile <x86intrin.h>
				2259	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2260	/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2261	///
				2262	/// \param __a
				2263	/// A 128-bit signed [8 x i16] vector.
				2264	/// \param __b
				2265	/// A 128-bit signed [8 x i16] vector.
				2266	/// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
				2267	/// each comparison.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2268	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2269	_mm_min_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2270	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2271	return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2272	}
				2273
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2274	/// \brief Compares corresponding elements of two 128-bit unsigned [16 x i8]
				2275	/// vectors, saving the smaller value from each comparison in the
				2276	/// corresponding element of a 128-bit result vector of [16 x i8].
				2277	///
				2278	/// \headerfile <x86intrin.h>
				2279	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2280	/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2281	///
				2282	/// \param __a
				2283	/// A 128-bit unsigned [16 x i8] vector.
				2284	/// \param __b
				2285	/// A 128-bit unsigned [16 x i8] vector.
				2286	/// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
				2287	/// each comparison.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2288	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2289	_mm_min_epu8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2290	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2291	return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2292	}
				2293
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2294	/// \brief Multiplies the corresponding elements of two signed [8 x i16]
				2295	/// vectors, saving the upper 16 bits of each 32-bit product in the
				2296	/// corresponding element of a 128-bit signed [8 x i16] result vector.
				2297	///
				2298	/// \headerfile <x86intrin.h>
				2299	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2300	/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2301	///
				2302	/// \param __a
				2303	/// A 128-bit signed [8 x i16] vector.
				2304	/// \param __b
				2305	/// A 128-bit signed [8 x i16] vector.
				2306	/// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
				2307	/// each of the eight 32-bit products.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2308	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2309	_mm_mulhi_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2310	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2311	return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2312	}
				2313
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2314	/// \brief Multiplies the corresponding elements of two unsigned [8 x i16]
				2315	/// vectors, saving the upper 16 bits of each 32-bit product in the
				2316	/// corresponding element of a 128-bit unsigned [8 x i16] result vector.
				2317	///
				2318	/// \headerfile <x86intrin.h>
				2319	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2320	/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2321	///
				2322	/// \param __a
				2323	/// A 128-bit unsigned [8 x i16] vector.
				2324	/// \param __b
				2325	/// A 128-bit unsigned [8 x i16] vector.
				2326	/// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
				2327	/// of each of the eight 32-bit products.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2328	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2329	_mm_mulhi_epu16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2330	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2331	return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2332	}
				2333
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2334	/// \brief Multiplies the corresponding elements of two signed [8 x i16]
				2335	/// vectors, saving the lower 16 bits of each 32-bit product in the
				2336	/// corresponding element of a 128-bit signed [8 x i16] result vector.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2337	///
				2338	/// \headerfile <x86intrin.h>
				2339	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2340	/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2341	///
				2342	/// \param __a
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2343	/// A 128-bit signed [8 x i16] vector.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2344	/// \param __b
Ekaterina Romanova	06477bf	2016-10-23 07:30:50 +0000	[diff] [blame]	2345	/// A 128-bit signed [8 x i16] vector.
				2346	/// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
				2347	/// each of the eight 32-bit products.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2348	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2349	_mm_mullo_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2350	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2351	return (__m128i)((__v8hu)__a * (__v8hu)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2352	}
				2353
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2354	/// \brief Multiplies 32-bit unsigned integer values contained in the lower bits
				2355	/// of the two 64-bit integer vectors and returns the 64-bit unsigned
				2356	/// product.
				2357	///
				2358	/// \headerfile <x86intrin.h>
				2359	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2360	/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2361	///
				2362	/// \param __a
				2363	/// A 64-bit integer containing one of the source operands.
				2364	/// \param __b
				2365	/// A 64-bit integer containing one of the source operands.
				2366	/// \returns A 64-bit integer vector containing the product of both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2367	static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2368	_mm_mul_su32(__m64 __a, __m64 __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2369	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2370	return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2371	}
				2372
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2373	/// \brief Multiplies 32-bit unsigned integer values contained in the lower
				2374	/// bits of the corresponding elements of two [2 x i64] vectors, and returns
				2375	/// the 64-bit products in the corresponding elements of a [2 x i64] vector.
				2376	///
				2377	/// \headerfile <x86intrin.h>
				2378	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2379	/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2380	///
				2381	/// \param __a
				2382	/// A [2 x i64] vector containing one of the source operands.
				2383	/// \param __b
				2384	/// A [2 x i64] vector containing one of the source operands.
				2385	/// \returns A [2 x i64] vector containing the product of both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2386	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2387	_mm_mul_epu32(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2388	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2389	return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2390	}
				2391
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2392	/// \brief Computes the absolute differences of corresponding 8-bit integer
				2393	/// values in two 128-bit vectors. Sums the first 8 absolute differences, and
				2394	/// separately sums the second 8 absolute differences. Packss these two
				2395	/// unsigned 16-bit integer sums into the upper and lower elements of a
				2396	/// [2 x i64] vector.
				2397	///
				2398	/// \headerfile <x86intrin.h>
				2399	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2400	/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2401	///
				2402	/// \param __a
				2403	/// A 128-bit integer vector containing one of the source operands.
				2404	/// \param __b
				2405	/// A 128-bit integer vector containing one of the source operands.
				2406	/// \returns A [2 x i64] vector containing the sums of the sets of absolute
				2407	/// differences between both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2408	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2409	_mm_sad_epu8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2410	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2411	return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2412	}
				2413
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2414	/// \brief Subtracts the corresponding 8-bit integer values in the operands.
				2415	///
				2416	/// \headerfile <x86intrin.h>
				2417	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2418	/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2419	///
				2420	/// \param __a
				2421	/// A 128-bit integer vector containing the minuends.
				2422	/// \param __b
				2423	/// A 128-bit integer vector containing the subtrahends.
				2424	/// \returns A 128-bit integer vector containing the differences of the values
				2425	/// in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2426	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2427	_mm_sub_epi8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2428	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2429	return (__m128i)((__v16qu)__a - (__v16qu)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2430	}
				2431
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2432	/// \brief Subtracts the corresponding 16-bit integer values in the operands.
				2433	///
				2434	/// \headerfile <x86intrin.h>
				2435	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2436	/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2437	///
				2438	/// \param __a
				2439	/// A 128-bit integer vector containing the minuends.
				2440	/// \param __b
				2441	/// A 128-bit integer vector containing the subtrahends.
				2442	/// \returns A 128-bit integer vector containing the differences of the values
				2443	/// in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2444	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2445	_mm_sub_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2446	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2447	return (__m128i)((__v8hu)__a - (__v8hu)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2448	}
				2449
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2450	/// \brief Subtracts the corresponding 32-bit integer values in the operands.
				2451	///
				2452	/// \headerfile <x86intrin.h>
				2453	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2454	/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2455	///
				2456	/// \param __a
				2457	/// A 128-bit integer vector containing the minuends.
				2458	/// \param __b
				2459	/// A 128-bit integer vector containing the subtrahends.
				2460	/// \returns A 128-bit integer vector containing the differences of the values
				2461	/// in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2462	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2463	_mm_sub_epi32(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2464	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2465	return (__m128i)((__v4su)__a - (__v4su)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2466	}
				2467
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2468	/// \brief Subtracts signed or unsigned 64-bit integer values and writes the
				2469	/// difference to the corresponding bits in the destination.
				2470	///
				2471	/// \headerfile <x86intrin.h>
				2472	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2473	/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2474	///
				2475	/// \param __a
				2476	/// A 64-bit integer vector containing the minuend.
				2477	/// \param __b
				2478	/// A 64-bit integer vector containing the subtrahend.
				2479	/// \returns A 64-bit integer vector containing the difference of the values in
				2480	/// the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2481	static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2482	_mm_sub_si64(__m64 __a, __m64 __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2483	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	2484	return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2485	}
				2486
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2487	/// \brief Subtracts the corresponding elements of two [2 x i64] vectors.
				2488	///
				2489	/// \headerfile <x86intrin.h>
				2490	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2491	/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2492	///
				2493	/// \param __a
				2494	/// A 128-bit integer vector containing the minuends.
				2495	/// \param __b
				2496	/// A 128-bit integer vector containing the subtrahends.
				2497	/// \returns A 128-bit integer vector containing the differences of the values
				2498	/// in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2499	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2500	_mm_sub_epi64(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2501	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2502	return (__m128i)((__v2du)__a - (__v2du)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2503	}
				2504
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2505	/// \brief Subtracts corresponding 8-bit signed integer values in the input and
				2506	/// returns the differences in the corresponding bytes in the destination.
				2507	/// Differences greater than 7Fh are saturated to 7Fh, and differences less
				2508	/// than 80h are saturated to 80h.
				2509	///
				2510	/// \headerfile <x86intrin.h>
				2511	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2512	/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2513	///
				2514	/// \param __a
				2515	/// A 128-bit integer vector containing the minuends.
				2516	/// \param __b
				2517	/// A 128-bit integer vector containing the subtrahends.
				2518	/// \returns A 128-bit integer vector containing the differences of the values
				2519	/// in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2520	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2521	_mm_subs_epi8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2522	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2523	return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2524	}
				2525
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2526	/// \brief Subtracts corresponding 16-bit signed integer values in the input and
				2527	/// returns the differences in the corresponding bytes in the destination.
				2528	/// Differences greater than 7FFFh are saturated to 7FFFh, and values less
				2529	/// than 8000h are saturated to 8000h.
				2530	///
				2531	/// \headerfile <x86intrin.h>
				2532	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2533	/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2534	///
				2535	/// \param __a
				2536	/// A 128-bit integer vector containing the minuends.
				2537	/// \param __b
				2538	/// A 128-bit integer vector containing the subtrahends.
				2539	/// \returns A 128-bit integer vector containing the differences of the values
				2540	/// in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2541	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2542	_mm_subs_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2543	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2544	return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2545	}
				2546
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2547	/// \brief Subtracts corresponding 8-bit unsigned integer values in the input
				2548	/// and returns the differences in the corresponding bytes in the
				2549	/// destination. Differences less than 00h are saturated to 00h.
				2550	///
				2551	/// \headerfile <x86intrin.h>
				2552	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2553	/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2554	///
				2555	/// \param __a
				2556	/// A 128-bit integer vector containing the minuends.
				2557	/// \param __b
				2558	/// A 128-bit integer vector containing the subtrahends.
				2559	/// \returns A 128-bit integer vector containing the unsigned integer
				2560	/// differences of the values in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2561	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2562	_mm_subs_epu8(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2563	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2564	return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2565	}
				2566
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2567	/// \brief Subtracts corresponding 16-bit unsigned integer values in the input
				2568	/// and returns the differences in the corresponding bytes in the
				2569	/// destination. Differences less than 0000h are saturated to 0000h.
				2570	///
				2571	/// \headerfile <x86intrin.h>
				2572	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2573	/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2574	///
				2575	/// \param __a
				2576	/// A 128-bit integer vector containing the minuends.
				2577	/// \param __b
				2578	/// A 128-bit integer vector containing the subtrahends.
				2579	/// \returns A 128-bit integer vector containing the unsigned integer
				2580	/// differences of the values in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2581	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2582	_mm_subs_epu16(__m128i __a, __m128i __b)
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2583	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2584	return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	a283f91	2008-12-24 02:41:00 +0000	[diff] [blame]	2585	}
				2586
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2587	/// \brief Performs a bitwise AND of two 128-bit integer vectors.
				2588	///
				2589	/// \headerfile <x86intrin.h>
				2590	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2591	/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2592	///
				2593	/// \param __a
				2594	/// A 128-bit integer vector containing one of the source operands.
				2595	/// \param __b
				2596	/// A 128-bit integer vector containing one of the source operands.
				2597	/// \returns A 128-bit integer vector containing the bitwise AND of the values
				2598	/// in both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2599	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2600	_mm_and_si128(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2601	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2602	return (__m128i)((__v2du)__a & (__v2du)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2603	}
				2604
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2605	/// \brief Performs a bitwise AND of two 128-bit integer vectors, using the
				2606	/// one's complement of the values contained in the first source operand.
				2607	///
				2608	/// \headerfile <x86intrin.h>
				2609	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2610	/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2611	///
				2612	/// \param __a
				2613	/// A 128-bit vector containing the left source operand. The one's complement
				2614	/// of this value is used in the bitwise AND.
				2615	/// \param __b
				2616	/// A 128-bit vector containing the right source operand.
				2617	/// \returns A 128-bit integer vector containing the bitwise AND of the one's
				2618	/// complement of the first operand and the values in the second operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2619	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2620	_mm_andnot_si128(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2621	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2622	return (__m128i)(~(__v2du)__a & (__v2du)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2623	}
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2624	/// \brief Performs a bitwise OR of two 128-bit integer vectors.
				2625	///
				2626	/// \headerfile <x86intrin.h>
				2627	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2628	/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2629	///
				2630	/// \param __a
				2631	/// A 128-bit integer vector containing one of the source operands.
				2632	/// \param __b
				2633	/// A 128-bit integer vector containing one of the source operands.
				2634	/// \returns A 128-bit integer vector containing the bitwise OR of the values
				2635	/// in both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2636	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2637	_mm_or_si128(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2638	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2639	return (__m128i)((__v2du)__a \| (__v2du)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2640	}
				2641
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2642	/// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors.
				2643	///
				2644	/// \headerfile <x86intrin.h>
				2645	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2646	/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2647	///
				2648	/// \param __a
				2649	/// A 128-bit integer vector containing one of the source operands.
				2650	/// \param __b
				2651	/// A 128-bit integer vector containing one of the source operands.
				2652	/// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
				2653	/// values in both operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2654	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2655	_mm_xor_si128(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2656	{
Craig Topper	6a77b62	2016-06-04 05:43:41 +0000	[diff] [blame]	2657	return (__m128i)((__v2du)__a ^ (__v2du)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2658	}
				2659
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2660	/// \brief Left-shifts the 128-bit integer vector operand by the specified
				2661	/// number of bytes. Low-order bits are cleared.
				2662	///
				2663	/// \headerfile <x86intrin.h>
				2664	///
				2665	/// \code
				2666	/// __m128i _mm_slli_si128(__m128i a, const int imm);
				2667	/// \endcode
				2668	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2669	/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2670	///
				2671	/// \param a
				2672	/// A 128-bit integer vector containing the source operand.
				2673	/// \param imm
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	2674	/// An immediate value specifying the number of bytes to left-shift operand
				2675	/// \a a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2676	/// \returns A 128-bit integer vector containing the left-shifted value.
Craig Topper	50e3dfe	2016-06-25 07:31:14 +0000	[diff] [blame]	2677	#define _mm_slli_si128(a, imm) __extension__ ({ \
				2678	(__m128i)__builtin_shufflevector( \
				2679	(__v16qi)_mm_setzero_si128(), \
				2680	(__v16qi)(__m128i)(a), \
				2681	((char)(imm)&0xF0) ? 0 : 16 - (char)(imm), \
				2682	((char)(imm)&0xF0) ? 1 : 17 - (char)(imm), \
				2683	((char)(imm)&0xF0) ? 2 : 18 - (char)(imm), \
				2684	((char)(imm)&0xF0) ? 3 : 19 - (char)(imm), \
				2685	((char)(imm)&0xF0) ? 4 : 20 - (char)(imm), \
				2686	((char)(imm)&0xF0) ? 5 : 21 - (char)(imm), \
				2687	((char)(imm)&0xF0) ? 6 : 22 - (char)(imm), \
				2688	((char)(imm)&0xF0) ? 7 : 23 - (char)(imm), \
				2689	((char)(imm)&0xF0) ? 8 : 24 - (char)(imm), \
				2690	((char)(imm)&0xF0) ? 9 : 25 - (char)(imm), \
				2691	((char)(imm)&0xF0) ? 10 : 26 - (char)(imm), \
				2692	((char)(imm)&0xF0) ? 11 : 27 - (char)(imm), \
				2693	((char)(imm)&0xF0) ? 12 : 28 - (char)(imm), \
				2694	((char)(imm)&0xF0) ? 13 : 29 - (char)(imm), \
				2695	((char)(imm)&0xF0) ? 14 : 30 - (char)(imm), \
				2696	((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)); })
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2697
Craig Topper	a462482	2015-02-13 06:04:45 +0000	[diff] [blame]	2698	#define _mm_bslli_si128(a, imm) \
				2699	_mm_slli_si128((a), (imm))
				2700
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2701	/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
				2702	/// by the specified number of bits. Low-order bits are cleared.
				2703	///
				2704	/// \headerfile <x86intrin.h>
				2705	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2706	/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2707	///
				2708	/// \param __a
				2709	/// A 128-bit integer vector containing the source operand.
				2710	/// \param __count
				2711	/// An integer value specifying the number of bits to left-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2712	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2713	/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2714	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2715	_mm_slli_epi16(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2716	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2717	return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2718	}
				2719
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2720	/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
				2721	/// by the specified number of bits. Low-order bits are cleared.
				2722	///
				2723	/// \headerfile <x86intrin.h>
				2724	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2725	/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2726	///
				2727	/// \param __a
				2728	/// A 128-bit integer vector containing the source operand.
				2729	/// \param __count
				2730	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2731	/// to left-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2732	/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2733	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2734	_mm_sll_epi16(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2735	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2736	return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2737	}
				2738
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2739	/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
				2740	/// by the specified number of bits. Low-order bits are cleared.
				2741	///
				2742	/// \headerfile <x86intrin.h>
				2743	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2744	/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2745	///
				2746	/// \param __a
				2747	/// A 128-bit integer vector containing the source operand.
				2748	/// \param __count
				2749	/// An integer value specifying the number of bits to left-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2750	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2751	/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2752	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2753	_mm_slli_epi32(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2754	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2755	return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2756	}
				2757
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2758	/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
				2759	/// by the specified number of bits. Low-order bits are cleared.
				2760	///
				2761	/// \headerfile <x86intrin.h>
				2762	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2763	/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2764	///
				2765	/// \param __a
				2766	/// A 128-bit integer vector containing the source operand.
				2767	/// \param __count
				2768	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2769	/// to left-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2770	/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2771	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2772	_mm_sll_epi32(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2773	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2774	return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2775	}
				2776
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2777	/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
				2778	/// by the specified number of bits. Low-order bits are cleared.
				2779	///
				2780	/// \headerfile <x86intrin.h>
				2781	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2782	/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2783	///
				2784	/// \param __a
				2785	/// A 128-bit integer vector containing the source operand.
				2786	/// \param __count
				2787	/// An integer value specifying the number of bits to left-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2788	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2789	/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2790	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2791	_mm_slli_epi64(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2792	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	2793	return __builtin_ia32_psllqi128((__v2di)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2794	}
				2795
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2796	/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
				2797	/// by the specified number of bits. Low-order bits are cleared.
				2798	///
				2799	/// \headerfile <x86intrin.h>
				2800	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2801	/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2802	///
				2803	/// \param __a
				2804	/// A 128-bit integer vector containing the source operand.
				2805	/// \param __count
				2806	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2807	/// to left-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2808	/// \returns A 128-bit integer vector containing the left-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2809	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2810	_mm_sll_epi64(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2811	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	2812	return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2813	}
				2814
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2815	/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
				2816	/// by the specified number of bits. High-order bits are filled with the sign
				2817	/// bit of the initial value.
				2818	///
				2819	/// \headerfile <x86intrin.h>
				2820	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2821	/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2822	///
				2823	/// \param __a
				2824	/// A 128-bit integer vector containing the source operand.
				2825	/// \param __count
				2826	/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2827	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2828	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2829	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2830	_mm_srai_epi16(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2831	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2832	return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2833	}
				2834
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2835	/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
				2836	/// by the specified number of bits. High-order bits are filled with the sign
				2837	/// bit of the initial value.
				2838	///
				2839	/// \headerfile <x86intrin.h>
				2840	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2841	/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2842	///
				2843	/// \param __a
				2844	/// A 128-bit integer vector containing the source operand.
				2845	/// \param __count
				2846	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2847	/// to right-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2848	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2849	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2850	_mm_sra_epi16(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2851	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2852	return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2853	}
				2854
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2855	/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
				2856	/// by the specified number of bits. High-order bits are filled with the sign
				2857	/// bit of the initial value.
				2858	///
				2859	/// \headerfile <x86intrin.h>
				2860	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2861	/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2862	///
				2863	/// \param __a
				2864	/// A 128-bit integer vector containing the source operand.
				2865	/// \param __count
				2866	/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2867	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2868	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2869	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2870	_mm_srai_epi32(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2871	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2872	return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2873	}
				2874
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2875	/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
				2876	/// by the specified number of bits. High-order bits are filled with the sign
				2877	/// bit of the initial value.
				2878	///
				2879	/// \headerfile <x86intrin.h>
				2880	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2881	/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2882	///
				2883	/// \param __a
				2884	/// A 128-bit integer vector containing the source operand.
				2885	/// \param __count
				2886	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2887	/// to right-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2888	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2889	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2890	_mm_sra_epi32(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2891	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2892	return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2893	}
				2894
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2895	/// \brief Right-shifts the 128-bit integer vector operand by the specified
				2896	/// number of bytes. High-order bits are cleared.
				2897	///
				2898	/// \headerfile <x86intrin.h>
				2899	///
				2900	/// \code
				2901	/// __m128i _mm_srli_si128(__m128i a, const int imm);
				2902	/// \endcode
				2903	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2904	/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2905	///
				2906	/// \param a
				2907	/// A 128-bit integer vector containing the source operand.
				2908	/// \param imm
				2909	/// An immediate value specifying the number of bytes to right-shift operand
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2910	/// \a a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2911	/// \returns A 128-bit integer vector containing the right-shifted value.
Craig Topper	50e3dfe	2016-06-25 07:31:14 +0000	[diff] [blame]	2912	#define _mm_srli_si128(a, imm) __extension__ ({ \
				2913	(__m128i)__builtin_shufflevector( \
				2914	(__v16qi)(__m128i)(a), \
				2915	(__v16qi)_mm_setzero_si128(), \
				2916	((char)(imm)&0xF0) ? 16 : (char)(imm) + 0, \
				2917	((char)(imm)&0xF0) ? 17 : (char)(imm) + 1, \
				2918	((char)(imm)&0xF0) ? 18 : (char)(imm) + 2, \
				2919	((char)(imm)&0xF0) ? 19 : (char)(imm) + 3, \
				2920	((char)(imm)&0xF0) ? 20 : (char)(imm) + 4, \
				2921	((char)(imm)&0xF0) ? 21 : (char)(imm) + 5, \
				2922	((char)(imm)&0xF0) ? 22 : (char)(imm) + 6, \
				2923	((char)(imm)&0xF0) ? 23 : (char)(imm) + 7, \
				2924	((char)(imm)&0xF0) ? 24 : (char)(imm) + 8, \
				2925	((char)(imm)&0xF0) ? 25 : (char)(imm) + 9, \
				2926	((char)(imm)&0xF0) ? 26 : (char)(imm) + 10, \
				2927	((char)(imm)&0xF0) ? 27 : (char)(imm) + 11, \
				2928	((char)(imm)&0xF0) ? 28 : (char)(imm) + 12, \
				2929	((char)(imm)&0xF0) ? 29 : (char)(imm) + 13, \
				2930	((char)(imm)&0xF0) ? 30 : (char)(imm) + 14, \
				2931	((char)(imm)&0xF0) ? 31 : (char)(imm) + 15); })
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2932
Craig Topper	a462482	2015-02-13 06:04:45 +0000	[diff] [blame]	2933	#define _mm_bsrli_si128(a, imm) \
				2934	_mm_srli_si128((a), (imm))
				2935
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2936	/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
				2937	/// operand by the specified number of bits. High-order bits are cleared.
				2938	///
				2939	/// \headerfile <x86intrin.h>
				2940	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2941	/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2942	///
				2943	/// \param __a
				2944	/// A 128-bit integer vector containing the source operand.
				2945	/// \param __count
				2946	/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2947	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2948	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2949	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2950	_mm_srli_epi16(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2951	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2952	return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2953	}
				2954
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2955	/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
				2956	/// operand by the specified number of bits. High-order bits are cleared.
				2957	///
				2958	/// \headerfile <x86intrin.h>
				2959	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2960	/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2961	///
				2962	/// \param __a
				2963	/// A 128-bit integer vector containing the source operand.
				2964	/// \param __count
				2965	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2966	/// to right-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2967	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2968	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2969	_mm_srl_epi16(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2970	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2971	return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2972	}
				2973
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2974	/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
				2975	/// operand by the specified number of bits. High-order bits are cleared.
				2976	///
				2977	/// \headerfile <x86intrin.h>
				2978	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2979	/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2980	///
				2981	/// \param __a
				2982	/// A 128-bit integer vector containing the source operand.
				2983	/// \param __count
				2984	/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	2985	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2986	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	2987	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2988	_mm_srli_epi32(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2989	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	2990	return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	2991	}
				2992
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2993	/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
				2994	/// operand by the specified number of bits. High-order bits are cleared.
				2995	///
				2996	/// \headerfile <x86intrin.h>
				2997	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	2998	/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	2999	///
				3000	/// \param __a
				3001	/// A 128-bit integer vector containing the source operand.
				3002	/// \param __count
				3003	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	3004	/// to right-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3005	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3006	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3007	_mm_srl_epi32(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3008	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3009	return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3010	}
				3011
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3012	/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
				3013	/// operand by the specified number of bits. High-order bits are cleared.
				3014	///
				3015	/// \headerfile <x86intrin.h>
				3016	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3017	/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3018	///
				3019	/// \param __a
				3020	/// A 128-bit integer vector containing the source operand.
				3021	/// \param __count
				3022	/// An integer value specifying the number of bits to right-shift each value
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	3023	/// in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3024	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3025	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3026	_mm_srli_epi64(__m128i __a, int __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3027	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	3028	return __builtin_ia32_psrlqi128((__v2di)__a, __count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3029	}
				3030
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3031	/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
				3032	/// operand by the specified number of bits. High-order bits are cleared.
				3033	///
				3034	/// \headerfile <x86intrin.h>
				3035	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3036	/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3037	///
				3038	/// \param __a
				3039	/// A 128-bit integer vector containing the source operand.
				3040	/// \param __count
				3041	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	3042	/// to right-shift each value in operand \a __a.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3043	/// \returns A 128-bit integer vector containing the right-shifted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3044	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3045	_mm_srl_epi64(__m128i __a, __m128i __count)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3046	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	3047	return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3048	}
				3049
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3050	/// \brief Compares each of the corresponding 8-bit values of the 128-bit
				3051	/// integer vectors for equality. Each comparison yields 0h for false, FFh
				3052	/// for true.
				3053	///
				3054	/// \headerfile <x86intrin.h>
				3055	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3056	/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3057	///
				3058	/// \param __a
				3059	/// A 128-bit integer vector.
				3060	/// \param __b
				3061	/// A 128-bit integer vector.
				3062	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3063	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3064	_mm_cmpeq_epi8(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3065	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3066	return (__m128i)((__v16qi)__a == (__v16qi)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3067	}
				3068
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3069	/// \brief Compares each of the corresponding 16-bit values of the 128-bit
				3070	/// integer vectors for equality. Each comparison yields 0h for false, FFFFh
				3071	/// for true.
				3072	///
				3073	/// \headerfile <x86intrin.h>
				3074	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3075	/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3076	///
				3077	/// \param __a
				3078	/// A 128-bit integer vector.
				3079	/// \param __b
				3080	/// A 128-bit integer vector.
				3081	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3082	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3083	_mm_cmpeq_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3084	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3085	return (__m128i)((__v8hi)__a == (__v8hi)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3086	}
				3087
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3088	/// \brief Compares each of the corresponding 32-bit values of the 128-bit
				3089	/// integer vectors for equality. Each comparison yields 0h for false,
				3090	/// FFFFFFFFh for true.
				3091	///
				3092	/// \headerfile <x86intrin.h>
				3093	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3094	/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3095	///
				3096	/// \param __a
				3097	/// A 128-bit integer vector.
				3098	/// \param __b
				3099	/// A 128-bit integer vector.
				3100	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3101	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3102	_mm_cmpeq_epi32(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3103	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3104	return (__m128i)((__v4si)__a == (__v4si)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3105	}
				3106
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3107	/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
				3108	/// integer vectors to determine if the values in the first operand are
				3109	/// greater than those in the second operand. Each comparison yields 0h for
				3110	/// false, FFh for true.
				3111	///
				3112	/// \headerfile <x86intrin.h>
				3113	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3114	/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3115	///
				3116	/// \param __a
				3117	/// A 128-bit integer vector.
				3118	/// \param __b
				3119	/// A 128-bit integer vector.
				3120	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3121	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3122	_mm_cmpgt_epi8(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3123	{
Nick Lewycky	d0ba379	2012-02-04 02:16:48 +0000	[diff] [blame]	3124	/* This function always performs a signed comparison, but __v16qi is a char
Chandler Carruth	cbe6411	2015-10-01 23:40:12 +0000	[diff] [blame]	3125	which may be signed or unsigned, so use __v16qs. */
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3126	return (__m128i)((__v16qs)__a > (__v16qs)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3127	}
				3128
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3129	/// \brief Compares each of the corresponding signed 16-bit values of the
				3130	/// 128-bit integer vectors to determine if the values in the first operand
				3131	/// are greater than those in the second operand. Each comparison yields 0h
				3132	/// for false, FFFFh for true.
				3133	///
				3134	/// \headerfile <x86intrin.h>
				3135	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3136	/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3137	///
				3138	/// \param __a
				3139	/// A 128-bit integer vector.
				3140	/// \param __b
				3141	/// A 128-bit integer vector.
				3142	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3143	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3144	_mm_cmpgt_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3145	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3146	return (__m128i)((__v8hi)__a > (__v8hi)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3147	}
				3148
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3149	/// \brief Compares each of the corresponding signed 32-bit values of the
				3150	/// 128-bit integer vectors to determine if the values in the first operand
				3151	/// are greater than those in the second operand. Each comparison yields 0h
				3152	/// for false, FFFFFFFFh for true.
				3153	///
				3154	/// \headerfile <x86intrin.h>
				3155	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3156	/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3157	///
				3158	/// \param __a
				3159	/// A 128-bit integer vector.
				3160	/// \param __b
				3161	/// A 128-bit integer vector.
				3162	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3163	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3164	_mm_cmpgt_epi32(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3165	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3166	return (__m128i)((__v4si)__a > (__v4si)__b);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3167	}
				3168
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3169	/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
				3170	/// integer vectors to determine if the values in the first operand are less
				3171	/// than those in the second operand. Each comparison yields 0h for false,
				3172	/// FFh for true.
				3173	///
				3174	/// \headerfile <x86intrin.h>
				3175	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3176	/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3177	///
				3178	/// \param __a
				3179	/// A 128-bit integer vector.
				3180	/// \param __b
				3181	/// A 128-bit integer vector.
				3182	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3183	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3184	_mm_cmplt_epi8(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3185	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3186	return _mm_cmpgt_epi8(__b, __a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3187	}
				3188
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3189	/// \brief Compares each of the corresponding signed 16-bit values of the
				3190	/// 128-bit integer vectors to determine if the values in the first operand
				3191	/// are less than those in the second operand. Each comparison yields 0h for
				3192	/// false, FFFFh for true.
				3193	///
				3194	/// \headerfile <x86intrin.h>
				3195	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3196	/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3197	///
				3198	/// \param __a
				3199	/// A 128-bit integer vector.
				3200	/// \param __b
				3201	/// A 128-bit integer vector.
				3202	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3203	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3204	_mm_cmplt_epi16(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3205	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3206	return _mm_cmpgt_epi16(__b, __a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3207	}
				3208
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3209	/// \brief Compares each of the corresponding signed 32-bit values of the
				3210	/// 128-bit integer vectors to determine if the values in the first operand
				3211	/// are less than those in the second operand. Each comparison yields 0h for
				3212	/// false, FFFFFFFFh for true.
				3213	///
				3214	/// \headerfile <x86intrin.h>
				3215	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3216	/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3217	///
				3218	/// \param __a
				3219	/// A 128-bit integer vector.
				3220	/// \param __b
				3221	/// A 128-bit integer vector.
				3222	/// \returns A 128-bit integer vector containing the comparison results.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3223	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3224	_mm_cmplt_epi32(__m128i __a, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3225	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3226	return _mm_cmpgt_epi32(__b, __a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3227	}
				3228
				3229	#ifdef __x86_64__
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3230	/// \brief Converts a 64-bit signed integer value from the second operand into a
				3231	/// double-precision value and returns it in the lower element of a [2 x
				3232	/// double] vector; the upper element of the returned vector is copied from
				3233	/// the upper element of the first operand.
				3234	///
				3235	/// \headerfile <x86intrin.h>
				3236	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3237	/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3238	///
				3239	/// \param __a
				3240	/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
				3241	/// copied to the upper 64 bits of the destination.
				3242	/// \param __b
				3243	/// A 64-bit signed integer operand containing the value to be converted.
				3244	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				3245	/// converted value of the second operand. The upper 64 bits are copied from
				3246	/// the upper 64 bits of the first operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3247	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3248	_mm_cvtsi64_sd(__m128d __a, long long __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3249	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3250	__a[0] = __b;
				3251	return __a;
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3252	}
				3253
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3254	/// \brief Converts the first (lower) element of a vector of [2 x double] into a
				3255	/// 64-bit signed integer value, according to the current rounding mode.
				3256	///
				3257	/// \headerfile <x86intrin.h>
				3258	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3259	/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3260	///
				3261	/// \param __a
				3262	/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
				3263	/// conversion.
				3264	/// \returns A 64-bit signed integer containing the converted value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3265	static __inline__ long long __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3266	_mm_cvtsd_si64(__m128d __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3267	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	3268	return __builtin_ia32_cvtsd2si64((__v2df)__a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3269	}
				3270
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3271	/// \brief Converts the first (lower) element of a vector of [2 x double] into a
				3272	/// 64-bit signed integer value, truncating the result when it is inexact.
				3273	///
				3274	/// \headerfile <x86intrin.h>
				3275	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	3276	/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
				3277	/// instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3278	///
				3279	/// \param __a
				3280	/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
				3281	/// conversion.
				3282	/// \returns A 64-bit signed integer containing the converted value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3283	static __inline__ long long __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3284	_mm_cvttsd_si64(__m128d __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3285	{
Simon Pilgrim	e3b9ee0	2016-07-20 10:18:01 +0000	[diff] [blame]	3286	return __builtin_ia32_cvttsd2si64((__v2df)__a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3287	}
				3288	#endif
				3289
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3290	/// \brief Converts a vector of [4 x i32] into a vector of [4 x float].
				3291	///
				3292	/// \headerfile <x86intrin.h>
				3293	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3294	/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3295	///
				3296	/// \param __a
				3297	/// A 128-bit integer vector.
				3298	/// \returns A 128-bit vector of [4 x float] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3299	static __inline__ __m128 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3300	_mm_cvtepi32_ps(__m128i __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3301	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3302	return __builtin_ia32_cvtdq2ps((__v4si)__a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3303	}
				3304
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3305	/// \brief Converts a vector of [4 x float] into a vector of [4 x i32].
				3306	///
				3307	/// \headerfile <x86intrin.h>
				3308	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3309	/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3310	///
				3311	/// \param __a
				3312	/// A 128-bit vector of [4 x float].
				3313	/// \returns A 128-bit integer vector of [4 x i32] containing the converted
				3314	/// values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3315	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3316	_mm_cvtps_epi32(__m128 __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3317	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	3318	return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3319	}
				3320
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3321	/// \brief Converts a vector of [4 x float] into a vector of [4 x i32],
				3322	/// truncating the result when it is inexact.
				3323	///
				3324	/// \headerfile <x86intrin.h>
				3325	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	3326	/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>
				3327	/// instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3328	///
				3329	/// \param __a
				3330	/// A 128-bit vector of [4 x float].
				3331	/// \returns A 128-bit vector of [4 x i32] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3332	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3333	_mm_cvttps_epi32(__m128 __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3334	{
Simon Pilgrim	e3b9ee0	2016-07-20 10:18:01 +0000	[diff] [blame]	3335	return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3336	}
				3337
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3338	/// \brief Returns a vector of [4 x i32] where the lowest element is the input
				3339	/// operand and the remaining elements are zero.
				3340	///
				3341	/// \headerfile <x86intrin.h>
				3342	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3343	/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3344	///
				3345	/// \param __a
				3346	/// A 32-bit signed integer operand.
				3347	/// \returns A 128-bit vector of [4 x i32].
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3348	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3349	_mm_cvtsi32_si128(int __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3350	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3351	return (__m128i)(__v4si){ __a, 0, 0, 0 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3352	}
				3353
				3354	#ifdef __x86_64__
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3355	/// \brief Returns a vector of [2 x i64] where the lower element is the input
				3356	/// operand and the upper element is zero.
				3357	///
				3358	/// \headerfile <x86intrin.h>
				3359	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3360	/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3361	///
				3362	/// \param __a
				3363	/// A 64-bit signed integer operand containing the value to be converted.
				3364	/// \returns A 128-bit vector of [2 x i64] containing the converted value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3365	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3366	_mm_cvtsi64_si128(long long __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3367	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3368	return (__m128i){ __a, 0 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3369	}
				3370	#endif
				3371
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3372	/// \brief Moves the least significant 32 bits of a vector of [4 x i32] to a
				3373	/// 32-bit signed integer value.
				3374	///
				3375	/// \headerfile <x86intrin.h>
				3376	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3377	/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3378	///
				3379	/// \param __a
				3380	/// A vector of [4 x i32]. The least significant 32 bits are moved to the
				3381	/// destination.
				3382	/// \returns A 32-bit signed integer containing the moved value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3383	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3384	_mm_cvtsi128_si32(__m128i __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3385	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3386	__v4si __b = (__v4si)__a;
				3387	return __b[0];
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3388	}
				3389
				3390	#ifdef __x86_64__
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3391	/// \brief Moves the least significant 64 bits of a vector of [2 x i64] to a
				3392	/// 64-bit signed integer value.
				3393	///
				3394	/// \headerfile <x86intrin.h>
				3395	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3396	/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3397	///
				3398	/// \param __a
				3399	/// A vector of [2 x i64]. The least significant 64 bits are moved to the
				3400	/// destination.
				3401	/// \returns A 64-bit signed integer containing the moved value.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3402	static __inline__ long long __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3403	_mm_cvtsi128_si64(__m128i __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3404	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3405	return __a[0];
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3406	}
				3407	#endif
				3408
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3409	/// \brief Moves packed integer values from an aligned 128-bit memory location
				3410	/// to elements in a 128-bit integer vector.
				3411	///
				3412	/// \headerfile <x86intrin.h>
				3413	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3414	/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3415	///
				3416	/// \param __p
				3417	/// An aligned pointer to a memory location containing integer values.
				3418	/// \returns A 128-bit integer vector containing the moved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3419	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3420	_mm_load_si128(__m128i const *__p)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3421	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3422	return *__p;
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3423	}
				3424
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3425	/// \brief Moves packed integer values from an unaligned 128-bit memory location
				3426	/// to elements in a 128-bit integer vector.
				3427	///
				3428	/// \headerfile <x86intrin.h>
				3429	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3430	/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3431	///
				3432	/// \param __p
				3433	/// A pointer to a memory location containing integer values.
				3434	/// \returns A 128-bit integer vector containing the moved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3435	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3436	_mm_loadu_si128(__m128i const *__p)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3437	{
Bill Wendling	502931f	2011-05-13 00:11:39 +0000	[diff] [blame]	3438	struct __loadu_si128 {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3439	__m128i __v;
David Majnemer	1cf22e6	2015-02-04 00:26:10 +0000	[diff] [blame]	3440	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3441	return ((struct __loadu_si128*)__p)->__v;
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3442	}
				3443
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3444	/// \brief Returns a vector of [2 x i64] where the lower element is taken from
				3445	/// the lower element of the operand, and the upper element is zero.
				3446	///
				3447	/// \headerfile <x86intrin.h>
				3448	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3449	/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3450	///
				3451	/// \param __p
				3452	/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
				3453	/// the destination.
				3454	/// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
				3455	/// moved value. The higher order bits are cleared.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3456	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3457	_mm_loadl_epi64(__m128i const *__p)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3458	{
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	3459	struct __mm_loadl_epi64_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3460	long long __u;
Eli Friedman	9bb51ad	2011-09-15 23:15:27 +0000	[diff] [blame]	3461	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3462	return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3463	}
				3464
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3465	/// \brief Generates a 128-bit vector of [4 x i32] with unspecified content.
				3466	/// This could be used as an argument to another intrinsic function where the
				3467	/// argument is required but the value is not actually used.
				3468	///
				3469	/// \headerfile <x86intrin.h>
				3470	///
				3471	/// This intrinsic has no corresponding instruction.
				3472	///
				3473	/// \returns A 128-bit vector of [4 x i32] with unspecified content.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3474	static __inline__ __m128i __DEFAULT_FN_ATTRS
Craig Topper	3a0c726	2016-06-09 05:14:28 +0000	[diff] [blame]	3475	_mm_undefined_si128(void)
Simon Pilgrim	5aba992	2015-08-26 21:17:12 +0000	[diff] [blame]	3476	{
				3477	return (__m128i)__builtin_ia32_undef128();
				3478	}
				3479
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3480	/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
				3481	/// the specified 64-bit integer values.
				3482	///
				3483	/// \headerfile <x86intrin.h>
				3484	///
				3485	/// This intrinsic is a utility function and does not correspond to a specific
				3486	/// instruction.
				3487	///
				3488	/// \param __q1
				3489	/// A 64-bit integer value used to initialize the upper 64 bits of the
				3490	/// destination vector of [2 x i64].
				3491	/// \param __q0
				3492	/// A 64-bit integer value used to initialize the lower 64 bits of the
				3493	/// destination vector of [2 x i64].
				3494	/// \returns An initialized 128-bit vector of [2 x i64] containing the values
				3495	/// provided in the operands.
Simon Pilgrim	5aba992	2015-08-26 21:17:12 +0000	[diff] [blame]	3496	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3497	_mm_set_epi64x(long long __q1, long long __q0)
Anders Carlsson	dfa3117	2009-09-18 17:03:55 +0000	[diff] [blame]	3498	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3499	return (__m128i){ __q0, __q1 };
Anders Carlsson	dfa3117	2009-09-18 17:03:55 +0000	[diff] [blame]	3500	}
				3501
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3502	/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
				3503	/// the specified 64-bit integer values.
				3504	///
				3505	/// \headerfile <x86intrin.h>
				3506	///
				3507	/// This intrinsic is a utility function and does not correspond to a specific
				3508	/// instruction.
				3509	///
				3510	/// \param __q1
				3511	/// A 64-bit integer value used to initialize the upper 64 bits of the
				3512	/// destination vector of [2 x i64].
				3513	/// \param __q0
				3514	/// A 64-bit integer value used to initialize the lower 64 bits of the
				3515	/// destination vector of [2 x i64].
				3516	/// \returns An initialized 128-bit vector of [2 x i64] containing the values
				3517	/// provided in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3518	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3519	_mm_set_epi64(__m64 __q1, __m64 __q0)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3520	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3521	return (__m128i){ (long long)__q0, (long long)__q1 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3522	}
				3523
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3524	/// \brief Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
				3525	/// the specified 32-bit integer values.
				3526	///
				3527	/// \headerfile <x86intrin.h>
				3528	///
				3529	/// This intrinsic is a utility function and does not correspond to a specific
				3530	/// instruction.
				3531	///
				3532	/// \param __i3
				3533	/// A 32-bit integer value used to initialize bits [127:96] of the
				3534	/// destination vector.
				3535	/// \param __i2
				3536	/// A 32-bit integer value used to initialize bits [95:64] of the destination
				3537	/// vector.
				3538	/// \param __i1
				3539	/// A 32-bit integer value used to initialize bits [63:32] of the destination
				3540	/// vector.
				3541	/// \param __i0
				3542	/// A 32-bit integer value used to initialize bits [31:0] of the destination
				3543	/// vector.
				3544	/// \returns An initialized 128-bit vector of [4 x i32] containing the values
				3545	/// provided in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3546	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3547	_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3548	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3549	return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3550	}
				3551
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3552	/// \brief Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
				3553	/// the specified 16-bit integer values.
				3554	///
				3555	/// \headerfile <x86intrin.h>
				3556	///
				3557	/// This intrinsic is a utility function and does not correspond to a specific
				3558	/// instruction.
				3559	///
				3560	/// \param __w7
				3561	/// A 16-bit integer value used to initialize bits [127:112] of the
				3562	/// destination vector.
				3563	/// \param __w6
				3564	/// A 16-bit integer value used to initialize bits [111:96] of the
				3565	/// destination vector.
				3566	/// \param __w5
				3567	/// A 16-bit integer value used to initialize bits [95:80] of the destination
				3568	/// vector.
				3569	/// \param __w4
				3570	/// A 16-bit integer value used to initialize bits [79:64] of the destination
				3571	/// vector.
				3572	/// \param __w3
				3573	/// A 16-bit integer value used to initialize bits [63:48] of the destination
				3574	/// vector.
				3575	/// \param __w2
				3576	/// A 16-bit integer value used to initialize bits [47:32] of the destination
				3577	/// vector.
				3578	/// \param __w1
				3579	/// A 16-bit integer value used to initialize bits [31:16] of the destination
				3580	/// vector.
				3581	/// \param __w0
				3582	/// A 16-bit integer value used to initialize bits [15:0] of the destination
				3583	/// vector.
				3584	/// \returns An initialized 128-bit vector of [8 x i16] containing the values
				3585	/// provided in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3586	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3587	_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3588	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3589	return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3590	}
				3591
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3592	/// \brief Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
				3593	/// the specified 8-bit integer values.
				3594	///
				3595	/// \headerfile <x86intrin.h>
				3596	///
				3597	/// This intrinsic is a utility function and does not correspond to a specific
				3598	/// instruction.
				3599	///
				3600	/// \param __b15
				3601	/// Initializes bits [127:120] of the destination vector.
				3602	/// \param __b14
				3603	/// Initializes bits [119:112] of the destination vector.
				3604	/// \param __b13
				3605	/// Initializes bits [111:104] of the destination vector.
				3606	/// \param __b12
				3607	/// Initializes bits [103:96] of the destination vector.
				3608	/// \param __b11
				3609	/// Initializes bits [95:88] of the destination vector.
				3610	/// \param __b10
				3611	/// Initializes bits [87:80] of the destination vector.
				3612	/// \param __b9
				3613	/// Initializes bits [79:72] of the destination vector.
				3614	/// \param __b8
				3615	/// Initializes bits [71:64] of the destination vector.
				3616	/// \param __b7
				3617	/// Initializes bits [63:56] of the destination vector.
				3618	/// \param __b6
				3619	/// Initializes bits [55:48] of the destination vector.
				3620	/// \param __b5
				3621	/// Initializes bits [47:40] of the destination vector.
				3622	/// \param __b4
				3623	/// Initializes bits [39:32] of the destination vector.
				3624	/// \param __b3
				3625	/// Initializes bits [31:24] of the destination vector.
				3626	/// \param __b2
				3627	/// Initializes bits [23:16] of the destination vector.
				3628	/// \param __b1
				3629	/// Initializes bits [15:8] of the destination vector.
				3630	/// \param __b0
				3631	/// Initializes bits [7:0] of the destination vector.
				3632	/// \returns An initialized 128-bit vector of [16 x i8] containing the values
				3633	/// provided in the operands.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3634	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3635	_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3636	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3637	return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3638	}
				3639
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3640	/// \brief Initializes both values in a 128-bit integer vector with the
				3641	/// specified 64-bit integer value.
				3642	///
				3643	/// \headerfile <x86intrin.h>
				3644	///
				3645	/// This intrinsic is a utility function and does not correspond to a specific
				3646	/// instruction.
				3647	///
				3648	/// \param __q
				3649	/// Integer value used to initialize the elements of the destination integer
				3650	/// vector.
				3651	/// \returns An initialized 128-bit integer vector of [2 x i64] with both
				3652	/// elements containing the value provided in the operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3653	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3654	_mm_set1_epi64x(long long __q)
Anders Carlsson	dfa3117	2009-09-18 17:03:55 +0000	[diff] [blame]	3655	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3656	return (__m128i){ __q, __q };
Anders Carlsson	dfa3117	2009-09-18 17:03:55 +0000	[diff] [blame]	3657	}
				3658
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3659	/// \brief Initializes both values in a 128-bit vector of [2 x i64] with the
				3660	/// specified 64-bit value.
				3661	///
				3662	/// \headerfile <x86intrin.h>
				3663	///
				3664	/// This intrinsic is a utility function and does not correspond to a specific
				3665	/// instruction.
				3666	///
				3667	/// \param __q
				3668	/// A 64-bit value used to initialize the elements of the destination integer
				3669	/// vector.
				3670	/// \returns An initialized 128-bit vector of [2 x i64] with all elements
				3671	/// containing the value provided in the operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3672	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3673	_mm_set1_epi64(__m64 __q)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3674	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3675	return (__m128i){ (long long)__q, (long long)__q };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3676	}
				3677
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3678	/// \brief Initializes all values in a 128-bit vector of [4 x i32] with the
				3679	/// specified 32-bit value.
				3680	///
				3681	/// \headerfile <x86intrin.h>
				3682	///
				3683	/// This intrinsic is a utility function and does not correspond to a specific
				3684	/// instruction.
				3685	///
				3686	/// \param __i
				3687	/// A 32-bit value used to initialize the elements of the destination integer
				3688	/// vector.
				3689	/// \returns An initialized 128-bit vector of [4 x i32] with all elements
				3690	/// containing the value provided in the operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3691	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3692	_mm_set1_epi32(int __i)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3693	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3694	return (__m128i)(__v4si){ __i, __i, __i, __i };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3695	}
				3696
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3697	/// \brief Initializes all values in a 128-bit vector of [8 x i16] with the
				3698	/// specified 16-bit value.
				3699	///
				3700	/// \headerfile <x86intrin.h>
				3701	///
				3702	/// This intrinsic is a utility function and does not correspond to a specific
				3703	/// instruction.
				3704	///
				3705	/// \param __w
				3706	/// A 16-bit value used to initialize the elements of the destination integer
				3707	/// vector.
				3708	/// \returns An initialized 128-bit vector of [8 x i16] with all elements
				3709	/// containing the value provided in the operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3710	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3711	_mm_set1_epi16(short __w)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3712	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3713	return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3714	}
				3715
Ekaterina Romanova	f2ed620	2016-04-08 20:45:48 +0000	[diff] [blame]	3716	/// \brief Initializes all values in a 128-bit vector of [16 x i8] with the
				3717	/// specified 8-bit value.
				3718	///
				3719	/// \headerfile <x86intrin.h>
				3720	///
				3721	/// This intrinsic is a utility function and does not correspond to a specific
				3722	/// instruction.
				3723	///
				3724	/// \param __b
				3725	/// An 8-bit value used to initialize the elements of the destination integer
				3726	/// vector.
				3727	/// \returns An initialized 128-bit vector of [16 x i8] with all elements
				3728	/// containing the value provided in the operand.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3729	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3730	_mm_set1_epi8(char __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3731	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3732	return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3733	}
				3734
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3735	/// \brief Constructs a 128-bit integer vector, initialized in reverse order
				3736	/// with the specified 64-bit integral values.
				3737	///
				3738	/// \headerfile <x86intrin.h>
				3739	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	3740	/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
				3741	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3742	///
				3743	/// \param __q0
				3744	/// A 64-bit integral value used to initialize the lower 64 bits of the
				3745	/// result.
				3746	/// \param __q1
				3747	/// A 64-bit integral value used to initialize the upper 64 bits of the
				3748	/// result.
				3749	/// \returns An initialized 128-bit integer vector.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3750	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3751	_mm_setr_epi64(__m64 __q0, __m64 __q1)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3752	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3753	return (__m128i){ (long long)__q0, (long long)__q1 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3754	}
				3755
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3756	/// \brief Constructs a 128-bit integer vector, initialized in reverse order
				3757	/// with the specified 32-bit integral values.
				3758	///
				3759	/// \headerfile <x86intrin.h>
				3760	///
				3761	/// This intrinsic is a utility function and does not correspond to a specific
				3762	/// instruction.
				3763	///
				3764	/// \param __i0
				3765	/// A 32-bit integral value used to initialize bits [31:0] of the result.
				3766	/// \param __i1
				3767	/// A 32-bit integral value used to initialize bits [63:32] of the result.
				3768	/// \param __i2
				3769	/// A 32-bit integral value used to initialize bits [95:64] of the result.
				3770	/// \param __i3
				3771	/// A 32-bit integral value used to initialize bits [127:96] of the result.
				3772	/// \returns An initialized 128-bit integer vector.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3773	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3774	_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3775	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3776	return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3777	}
				3778
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3779	/// \brief Constructs a 128-bit integer vector, initialized in reverse order
				3780	/// with the specified 16-bit integral values.
				3781	///
				3782	/// \headerfile <x86intrin.h>
				3783	///
				3784	/// This intrinsic is a utility function and does not correspond to a specific
				3785	/// instruction.
				3786	///
				3787	/// \param __w0
				3788	/// A 16-bit integral value used to initialize bits [15:0] of the result.
				3789	/// \param __w1
				3790	/// A 16-bit integral value used to initialize bits [31:16] of the result.
				3791	/// \param __w2
				3792	/// A 16-bit integral value used to initialize bits [47:32] of the result.
				3793	/// \param __w3
				3794	/// A 16-bit integral value used to initialize bits [63:48] of the result.
				3795	/// \param __w4
				3796	/// A 16-bit integral value used to initialize bits [79:64] of the result.
				3797	/// \param __w5
				3798	/// A 16-bit integral value used to initialize bits [95:80] of the result.
				3799	/// \param __w6
				3800	/// A 16-bit integral value used to initialize bits [111:96] of the result.
				3801	/// \param __w7
				3802	/// A 16-bit integral value used to initialize bits [127:112] of the result.
				3803	/// \returns An initialized 128-bit integer vector.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3804	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3805	_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3806	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3807	return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3808	}
				3809
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3810	/// \brief Constructs a 128-bit integer vector, initialized in reverse order
				3811	/// with the specified 8-bit integral values.
				3812	///
				3813	/// \headerfile <x86intrin.h>
				3814	///
				3815	/// This intrinsic is a utility function and does not correspond to a specific
				3816	/// instruction.
				3817	///
				3818	/// \param __b0
				3819	/// An 8-bit integral value used to initialize bits [7:0] of the result.
				3820	/// \param __b1
				3821	/// An 8-bit integral value used to initialize bits [15:8] of the result.
				3822	/// \param __b2
				3823	/// An 8-bit integral value used to initialize bits [23:16] of the result.
				3824	/// \param __b3
				3825	/// An 8-bit integral value used to initialize bits [31:24] of the result.
				3826	/// \param __b4
				3827	/// An 8-bit integral value used to initialize bits [39:32] of the result.
				3828	/// \param __b5
				3829	/// An 8-bit integral value used to initialize bits [47:40] of the result.
				3830	/// \param __b6
				3831	/// An 8-bit integral value used to initialize bits [55:48] of the result.
				3832	/// \param __b7
				3833	/// An 8-bit integral value used to initialize bits [63:56] of the result.
				3834	/// \param __b8
				3835	/// An 8-bit integral value used to initialize bits [71:64] of the result.
				3836	/// \param __b9
				3837	/// An 8-bit integral value used to initialize bits [79:72] of the result.
				3838	/// \param __b10
				3839	/// An 8-bit integral value used to initialize bits [87:80] of the result.
				3840	/// \param __b11
				3841	/// An 8-bit integral value used to initialize bits [95:88] of the result.
				3842	/// \param __b12
				3843	/// An 8-bit integral value used to initialize bits [103:96] of the result.
				3844	/// \param __b13
				3845	/// An 8-bit integral value used to initialize bits [111:104] of the result.
				3846	/// \param __b14
				3847	/// An 8-bit integral value used to initialize bits [119:112] of the result.
				3848	/// \param __b15
				3849	/// An 8-bit integral value used to initialize bits [127:120] of the result.
				3850	/// \returns An initialized 128-bit integer vector.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3851	static __inline__ __m128i __DEFAULT_FN_ATTRS
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3852	_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3853	{
Michael Kuperstein	5c2cb0e	2015-09-21 11:45:27 +0000	[diff] [blame]	3854	return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3855	}
				3856
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3857	/// \brief Creates a 128-bit integer vector initialized to zero.
				3858	///
				3859	/// \headerfile <x86intrin.h>
				3860	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3861	/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3862	///
				3863	/// \returns An initialized 128-bit integer vector with all elements set to
				3864	/// zero.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3865	static __inline__ __m128i __DEFAULT_FN_ATTRS
Mike Stump	5b31ed3	2009-02-13 14:24:50 +0000	[diff] [blame]	3866	_mm_setzero_si128(void)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3867	{
				3868	return (__m128i){ 0LL, 0LL };
				3869	}
				3870
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3871	/// \brief Stores a 128-bit integer vector to a memory location aligned on a
				3872	/// 128-bit boundary.
				3873	///
				3874	/// \headerfile <x86intrin.h>
				3875	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3876	/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3877	///
				3878	/// \param __p
				3879	/// A pointer to an aligned memory location that will receive the integer
				3880	/// values.
				3881	/// \param __b
				3882	/// A 128-bit integer vector containing the values to be moved.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3883	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3884	_mm_store_si128(__m128i *__p, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3885	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3886	*__p = __b;
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3887	}
				3888
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3889	/// \brief Stores a 128-bit integer vector to an unaligned memory location.
				3890	///
				3891	/// \headerfile <x86intrin.h>
				3892	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3893	/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3894	///
				3895	/// \param __p
				3896	/// A pointer to a memory location that will receive the integer values.
				3897	/// \param __b
				3898	/// A 128-bit integer vector containing the values to be moved.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3899	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3900	_mm_storeu_si128(__m128i *__p, __m128i __b)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3901	{
Craig Topper	09175da	2016-05-30 17:10:30 +0000	[diff] [blame]	3902	struct __storeu_si128 {
				3903	__m128i __v;
				3904	} __attribute__((__packed__, __may_alias__));
				3905	((struct __storeu_si128*)__p)->__v = __b;
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3906	}
				3907
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3908	/// \brief Moves bytes selected by the mask from the first operand to the
				3909	/// specified unaligned memory location. When a mask bit is 1, the
				3910	/// corresponding byte is written, otherwise it is not written. To minimize
				3911	/// caching, the date is flagged as non-temporal (unlikely to be used again
				3912	/// soon). Exception and trap behavior for elements not selected for storage
				3913	/// to memory are implementation dependent.
				3914	///
				3915	/// \headerfile <x86intrin.h>
				3916	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	3917	/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>
				3918	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3919	///
				3920	/// \param __d
				3921	/// A 128-bit integer vector containing the values to be moved.
				3922	/// \param __n
				3923	/// A 128-bit integer vector containing the mask. The most significant bit of
				3924	/// each byte represents the mask bits.
				3925	/// \param __p
				3926	/// A pointer to an unaligned 128-bit memory location where the specified
				3927	/// values are moved.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3928	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3929	_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3930	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3931	__builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3932	}
				3933
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3934	/// \brief Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to
				3935	/// a memory location.
				3936	///
				3937	/// \headerfile <x86intrin.h>
				3938	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3939	/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3940	///
				3941	/// \param __p
				3942	/// A pointer to a 64-bit memory location that will receive the lower 64 bits
				3943	/// of the integer vector parameter.
				3944	/// \param __a
				3945	/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the
				3946	/// value to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3947	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3948	_mm_storel_epi64(__m128i *__p, __m128i __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3949	{
Chad Rosier	87622b8	2012-05-01 18:11:51 +0000	[diff] [blame]	3950	struct __mm_storel_epi64_struct {
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3951	long long __u;
Chad Rosier	87622b8	2012-05-01 18:11:51 +0000	[diff] [blame]	3952	} __attribute__((__packed__, __may_alias__));
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3953	((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3954	}
				3955
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3956	/// \brief Stores a 128-bit floating point vector of [2 x double] to a 128-bit
				3957	/// aligned memory location. To minimize caching, the data is flagged as
				3958	/// non-temporal (unlikely to be used again soon).
				3959	///
				3960	/// \headerfile <x86intrin.h>
				3961	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3962	/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3963	///
				3964	/// \param __p
				3965	/// A pointer to the 128-bit aligned memory location used to store the value.
				3966	/// \param __a
				3967	/// A vector of [2 x double] containing the 64-bit values to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3968	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3969	_mm_stream_pd(double *__p, __m128d __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3970	{
Simon Pilgrim	beca5f2	2016-06-13 09:57:52 +0000	[diff] [blame]	3971	__builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3972	}
				3973
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3974	/// \brief Stores a 128-bit integer vector to a 128-bit aligned memory location.
				3975	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
				3976	/// used again soon).
				3977	///
				3978	/// \headerfile <x86intrin.h>
				3979	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3980	/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3981	///
				3982	/// \param __p
				3983	/// A pointer to the 128-bit aligned memory location used to store the value.
				3984	/// \param __a
				3985	/// A 128-bit integer vector containing the values to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	3986	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	3987	_mm_stream_si128(__m128i *__p, __m128i __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3988	{
Simon Pilgrim	beca5f2	2016-06-13 09:57:52 +0000	[diff] [blame]	3989	__builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	3990	}
				3991
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3992	/// \brief Stores a 32-bit integer value in the specified memory location. To
				3993	/// minimize caching, the data is flagged as non-temporal (unlikely to be
				3994	/// used again soon).
				3995	///
				3996	/// \headerfile <x86intrin.h>
				3997	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	3998	/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	3999	///
				4000	/// \param __p
				4001	/// A pointer to the 32-bit memory location used to store the value.
				4002	/// \param __a
				4003	/// A 32-bit integer containing the value to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4004	static __inline__ void __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4005	_mm_stream_si32(int *__p, int __a)
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	4006	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4007	__builtin_ia32_movnti(__p, __a);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	4008	}
				4009
Eli Friedman	f9d8c6c	2013-09-23 23:38:39 +0000	[diff] [blame]	4010	#ifdef __x86_64__
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4011	/// \brief Stores a 64-bit integer value in the specified memory location. To
				4012	/// minimize caching, the data is flagged as non-temporal (unlikely to be
				4013	/// used again soon).
				4014	///
				4015	/// \headerfile <x86intrin.h>
				4016	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4017	/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4018	///
				4019	/// \param __p
				4020	/// A pointer to the 64-bit memory location used to store the value.
				4021	/// \param __a
				4022	/// A 64-bit integer containing the value to be stored.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4023	static __inline__ void __DEFAULT_FN_ATTRS
Eli Friedman	f9d8c6c	2013-09-23 23:38:39 +0000	[diff] [blame]	4024	_mm_stream_si64(long long *__p, long long __a)
				4025	{
				4026	__builtin_ia32_movnti64(__p, __a);
				4027	}
				4028	#endif
				4029
Albert Gutowski	727ab8a	2016-09-14 21:19:43 +0000	[diff] [blame]	4030	#if defined(__cplusplus)
				4031	extern "C" {
				4032	#endif
				4033
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	4034	/// \brief The cache line containing \a __p is flushed and invalidated from all
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4035	/// caches in the coherency domain.
				4036	///
				4037	/// \headerfile <x86intrin.h>
				4038	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4039	/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4040	///
				4041	/// \param __p
				4042	/// A pointer to the memory location used to identify the cache line to be
				4043	/// flushed.
Ekaterina Romanova	2e041c9	2017-01-13 01:14:08 +0000	[diff] [blame^]	4044	void _mm_clflush(void const * __p);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	4045
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4046	/// \brief Forces strong memory ordering (serialization) between load
				4047	/// instructions preceding this instruction and load instructions following
				4048	/// this instruction, ensuring the system completes all previous loads before
				4049	/// executing subsequent loads.
				4050	///
				4051	/// \headerfile <x86intrin.h>
				4052	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4053	/// This intrinsic corresponds to the <c> LFENCE </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4054	///
Albert Gutowski	727ab8a	2016-09-14 21:19:43 +0000	[diff] [blame]	4055	void _mm_lfence(void);
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	4056
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4057	/// \brief Forces strong memory ordering (serialization) between load and store
				4058	/// instructions preceding this instruction and load and store instructions
				4059	/// following this instruction, ensuring that the system completes all
				4060	/// previous memory accesses before executing subsequent memory accesses.
				4061	///
				4062	/// \headerfile <x86intrin.h>
				4063	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4064	/// This intrinsic corresponds to the <c> MFENCE </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4065	///
Albert Gutowski	727ab8a	2016-09-14 21:19:43 +0000	[diff] [blame]	4066	void _mm_mfence(void);
				4067
				4068	#if defined(__cplusplus)
				4069	} // extern "C"
				4070	#endif
Anders Carlsson	a0d5ca2	2008-12-25 23:48:58 +0000	[diff] [blame]	4071
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4072	/// \brief Converts 16-bit signed integers from both 128-bit integer vector
				4073	/// operands into 8-bit signed integers, and packs the results into the
				4074	/// destination. Positive values greater than 0x7F are saturated to 0x7F.
				4075	/// Negative values less than 0x80 are saturated to 0x80.
				4076	///
				4077	/// \headerfile <x86intrin.h>
				4078	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4079	/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4080	///
				4081	/// \param __a
				4082	/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
				4083	/// a signed integer and is converted to a 8-bit signed integer with
				4084	/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less
				4085	/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are
				4086	/// written to the lower 64 bits of the result.
				4087	/// \param __b
				4088	/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
				4089	/// a signed integer and is converted to a 8-bit signed integer with
				4090	/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less
				4091	/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are
				4092	/// written to the higher 64 bits of the result.
				4093	/// \returns A 128-bit vector of [16 x i8] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4094	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4095	_mm_packs_epi16(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4096	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4097	return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4098	}
				4099
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4100	/// \brief Converts 32-bit signed integers from both 128-bit integer vector
				4101	/// operands into 16-bit signed integers, and packs the results into the
				4102	/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.
				4103	/// Negative values less than 0x8000 are saturated to 0x8000.
				4104	///
				4105	/// \headerfile <x86intrin.h>
				4106	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4107	/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4108	///
				4109	/// \param __a
				4110	/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
				4111	/// a signed integer and is converted to a 16-bit signed integer with
				4112	/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
				4113	/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
				4114	/// are written to the lower 64 bits of the result.
				4115	/// \param __b
				4116	/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
				4117	/// a signed integer and is converted to a 16-bit signed integer with
				4118	/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
				4119	/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
				4120	/// are written to the higher 64 bits of the result.
				4121	/// \returns A 128-bit vector of [8 x i16] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4122	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4123	_mm_packs_epi32(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4124	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4125	return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4126	}
				4127
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4128	/// \brief Converts 16-bit signed integers from both 128-bit integer vector
				4129	/// operands into 8-bit unsigned integers, and packs the results into the
				4130	/// destination. Values greater than 0xFF are saturated to 0xFF. Values less
				4131	/// than 0x00 are saturated to 0x00.
				4132	///
				4133	/// \headerfile <x86intrin.h>
				4134	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4135	/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4136	///
				4137	/// \param __a
				4138	/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
				4139	/// a signed integer and is converted to an 8-bit unsigned integer with
				4140	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
				4141	/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are
				4142	/// written to the lower 64 bits of the result.
				4143	/// \param __b
				4144	/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
				4145	/// a signed integer and is converted to an 8-bit unsigned integer with
				4146	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
				4147	/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are
				4148	/// written to the higher 64 bits of the result.
				4149	/// \returns A 128-bit vector of [16 x i8] containing the converted values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4150	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4151	_mm_packus_epi16(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4152	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4153	return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4154	}
				4155
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4156	/// \brief Extracts 16 bits from a 128-bit integer vector of [8 x i16], using
				4157	/// the immediate-value parameter as a selector.
				4158	///
				4159	/// \headerfile <x86intrin.h>
				4160	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4161	/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4162	///
				4163	/// \param __a
				4164	/// A 128-bit integer vector.
				4165	/// \param __imm
Ekaterina Romanova	2e041c9	2017-01-13 01:14:08 +0000	[diff] [blame^]	4166	/// An immediate value. Bits [2:0] selects values from \a __a to be assigned
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4167	/// to bits[15:0] of the result. \n
				4168	/// 000: assign values from bits [15:0] of \a __a. \n
				4169	/// 001: assign values from bits [31:16] of \a __a. \n
				4170	/// 010: assign values from bits [47:32] of \a __a. \n
				4171	/// 011: assign values from bits [63:48] of \a __a. \n
				4172	/// 100: assign values from bits [79:64] of \a __a. \n
				4173	/// 101: assign values from bits [95:80] of \a __a. \n
				4174	/// 110: assign values from bits [111:96] of \a __a. \n
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	4175	/// 111: assign values from bits [127:112] of \a __a.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4176	/// \returns An integer, whose lower 16 bits are selected from the 128-bit
				4177	/// integer vector parameter and the remaining bits are assigned zeros.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4178	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4179	_mm_extract_epi16(__m128i __a, int __imm)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4180	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4181	__v8hi __b = (__v8hi)__a;
Manman Ren	be38b9e	2013-10-22 19:24:42 +0000	[diff] [blame]	4182	return (unsigned short)__b[__imm & 7];
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4183	}
				4184
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4185	/// \brief Constructs a 128-bit integer vector by first making a copy of the
				4186	/// 128-bit integer vector parameter, and then inserting the lower 16 bits
				4187	/// of an integer parameter into an offset specified by the immediate-value
				4188	/// parameter.
				4189	///
				4190	/// \headerfile <x86intrin.h>
				4191	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4192	/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4193	///
				4194	/// \param __a
				4195	/// A 128-bit integer vector of [8 x i16]. This vector is copied to the
				4196	/// result and then one of the eight elements in the result is replaced by
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	4197	/// the lower 16 bits of \a __b.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4198	/// \param __b
				4199	/// An integer. The lower 16 bits of this parameter are written to the
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	4200	/// result beginning at an offset specified by \a __imm.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4201	/// \param __imm
				4202	/// An immediate value specifying the bit offset in the result at which the
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	4203	/// lower 16 bits of \a __b are written.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4204	/// \returns A 128-bit integer vector containing the constructed values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4205	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4206	_mm_insert_epi16(__m128i __a, int __b, int __imm)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4207	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4208	__v8hi __c = (__v8hi)__a;
				4209	__c[__imm & 7] = __b;
				4210	return (__m128i)__c;
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4211	}
				4212
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4213	/// \brief Copies the values of the most significant bits from each 8-bit
				4214	/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
				4215	/// value, zero-extends the value, and writes it to the destination.
				4216	///
				4217	/// \headerfile <x86intrin.h>
				4218	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4219	/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4220	///
				4221	/// \param __a
				4222	/// A 128-bit integer vector containing the values with bits to be extracted.
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	4223	/// \returns The most significant bits from each 8-bit element in \a __a,
				4224	/// written to bits [15:0]. The other bits are assigned zeros.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4225	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4226	_mm_movemask_epi8(__m128i __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4227	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4228	return __builtin_ia32_pmovmskb128((__v16qi)__a);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4229	}
				4230
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4231	/// \brief Constructs a 128-bit integer vector by shuffling four 32-bit
				4232	/// elements of a 128-bit integer vector parameter, using the immediate-value
				4233	/// parameter as a specifier.
				4234	///
				4235	/// \headerfile <x86intrin.h>
				4236	///
				4237	/// \code
				4238	/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);
				4239	/// \endcode
				4240	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4241	/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4242	///
				4243	/// \param a
				4244	/// A 128-bit integer vector containing the values to be copied.
				4245	/// \param imm
				4246	/// An immediate value containing an 8-bit value specifying which elements to
				4247	/// copy from a. The destinations within the 128-bit destination are assigned
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4248	/// values as follows: \n
				4249	/// Bits [1:0] are used to assign values to bits [31:0] of the result. \n
				4250	/// Bits [3:2] are used to assign values to bits [63:32] of the result. \n
				4251	/// Bits [5:4] are used to assign values to bits [95:64] of the result. \n
				4252	/// Bits [7:6] are used to assign values to bits [127:96] of the result. \n
				4253	/// Bit value assignments: \n
				4254	/// 00: assign values from bits [31:0] of \a a. \n
				4255	/// 01: assign values from bits [63:32] of \a a. \n
				4256	/// 10: assign values from bits [95:64] of \a a. \n
				4257	/// 11: assign values from bits [127:96] of \a a.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4258	/// \returns A 128-bit integer vector containing the shuffled values.
Bob Wilson	c9b97cc	2011-11-05 06:08:06 +0000	[diff] [blame]	4259	#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
Craig Topper	51e4741	2015-02-13 06:04:43 +0000	[diff] [blame]	4260	(__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
Craig Topper	2a383c9	2016-07-04 22:18:01 +0000	[diff] [blame]	4261	(__v4si)_mm_undefined_si128(), \
				4262	((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
				4263	((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); })
Chris Lattner	f03406f	2011-04-25 20:42:40 +0000	[diff] [blame]	4264
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4265	/// \brief Constructs a 128-bit integer vector by shuffling four lower 16-bit
				4266	/// elements of a 128-bit integer vector of [8 x i16], using the immediate
				4267	/// value parameter as a specifier.
				4268	///
				4269	/// \headerfile <x86intrin.h>
				4270	///
				4271	/// \code
				4272	/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);
				4273	/// \endcode
				4274	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4275	/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4276	///
				4277	/// \param a
				4278	/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
				4279	/// [127:64] of the result.
				4280	/// \param imm
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4281	/// An 8-bit immediate value specifying which elements to copy from \a a. \n
				4282	/// Bits[1:0] are used to assign values to bits [15:0] of the result. \n
				4283	/// Bits[3:2] are used to assign values to bits [31:16] of the result. \n
				4284	/// Bits[5:4] are used to assign values to bits [47:32] of the result. \n
				4285	/// Bits[7:6] are used to assign values to bits [63:48] of the result. \n
				4286	/// Bit value assignments: \n
				4287	/// 00: assign values from bits [15:0] of \a a. \n
				4288	/// 01: assign values from bits [31:16] of \a a. \n
				4289	/// 10: assign values from bits [47:32] of \a a. \n
				4290	/// 11: assign values from bits [63:48] of \a a. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4291	/// \returns A 128-bit integer vector containing the shuffled values.
Bob Wilson	c9b97cc	2011-11-05 06:08:06 +0000	[diff] [blame]	4292	#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
Craig Topper	51e4741	2015-02-13 06:04:43 +0000	[diff] [blame]	4293	(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
Craig Topper	2a383c9	2016-07-04 22:18:01 +0000	[diff] [blame]	4294	(__v8hi)_mm_undefined_si128(), \
				4295	((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
				4296	((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \
Bob Wilson	c9b97cc	2011-11-05 06:08:06 +0000	[diff] [blame]	4297	4, 5, 6, 7); })
Chris Lattner	f03406f	2011-04-25 20:42:40 +0000	[diff] [blame]	4298
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4299	/// \brief Constructs a 128-bit integer vector by shuffling four upper 16-bit
				4300	/// elements of a 128-bit integer vector of [8 x i16], using the immediate
				4301	/// value parameter as a specifier.
				4302	///
				4303	/// \headerfile <x86intrin.h>
				4304	///
				4305	/// \code
				4306	/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);
				4307	/// \endcode
				4308	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4309	/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4310	///
				4311	/// \param a
				4312	/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
				4313	/// [63:0] of the result.
				4314	/// \param imm
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4315	/// An 8-bit immediate value specifying which elements to copy from \a a. \n
				4316	/// Bits[1:0] are used to assign values to bits [79:64] of the result. \n
				4317	/// Bits[3:2] are used to assign values to bits [95:80] of the result. \n
				4318	/// Bits[5:4] are used to assign values to bits [111:96] of the result. \n
				4319	/// Bits[7:6] are used to assign values to bits [127:112] of the result. \n
				4320	/// Bit value assignments: \n
				4321	/// 00: assign values from bits [79:64] of \a a. \n
				4322	/// 01: assign values from bits [95:80] of \a a. \n
				4323	/// 10: assign values from bits [111:96] of \a a. \n
				4324	/// 11: assign values from bits [127:112] of \a a. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4325	/// \returns A 128-bit integer vector containing the shuffled values.
Bob Wilson	c9b97cc	2011-11-05 06:08:06 +0000	[diff] [blame]	4326	#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
Craig Topper	51e4741	2015-02-13 06:04:43 +0000	[diff] [blame]	4327	(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
Craig Topper	2a383c9	2016-07-04 22:18:01 +0000	[diff] [blame]	4328	(__v8hi)_mm_undefined_si128(), \
Bob Wilson	c9b97cc	2011-11-05 06:08:06 +0000	[diff] [blame]	4329	0, 1, 2, 3, \
Craig Topper	2a383c9	2016-07-04 22:18:01 +0000	[diff] [blame]	4330	4 + (((imm) >> 0) & 0x3), \
				4331	4 + (((imm) >> 2) & 0x3), \
				4332	4 + (((imm) >> 4) & 0x3), \
				4333	4 + (((imm) >> 6) & 0x3)); })
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4334
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4335	/// \brief Unpacks the high-order (index 8-15) values from two 128-bit vectors
				4336	/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
				4337	///
				4338	/// \headerfile <x86intrin.h>
				4339	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4340	/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>
				4341	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4342	///
				4343	/// \param __a
				4344	/// A 128-bit vector of [16 x i8].
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4345	/// Bits [71:64] are written to bits [7:0] of the result. \n
				4346	/// Bits [79:72] are written to bits [23:16] of the result. \n
				4347	/// Bits [87:80] are written to bits [39:32] of the result. \n
				4348	/// Bits [95:88] are written to bits [55:48] of the result. \n
				4349	/// Bits [103:96] are written to bits [71:64] of the result. \n
				4350	/// Bits [111:104] are written to bits [87:80] of the result. \n
				4351	/// Bits [119:112] are written to bits [103:96] of the result. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4352	/// Bits [127:120] are written to bits [119:112] of the result.
				4353	/// \param __b
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4354	/// A 128-bit vector of [16 x i8]. \n
				4355	/// Bits [71:64] are written to bits [15:8] of the result. \n
				4356	/// Bits [79:72] are written to bits [31:24] of the result. \n
				4357	/// Bits [87:80] are written to bits [47:40] of the result. \n
				4358	/// Bits [95:88] are written to bits [63:56] of the result. \n
				4359	/// Bits [103:96] are written to bits [79:72] of the result. \n
				4360	/// Bits [111:104] are written to bits [95:88] of the result. \n
				4361	/// Bits [119:112] are written to bits [111:104] of the result. \n
				4362	/// Bits [127:120] are written to bits [127:120] of the result.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4363	/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4364	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4365	_mm_unpackhi_epi8(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4366	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4367	return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4368	}
				4369
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4370	/// \brief Unpacks the high-order (index 4-7) values from two 128-bit vectors of
				4371	/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].
				4372	///
				4373	/// \headerfile <x86intrin.h>
				4374	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4375	/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>
				4376	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4377	///
				4378	/// \param __a
				4379	/// A 128-bit vector of [8 x i16].
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4380	/// Bits [79:64] are written to bits [15:0] of the result. \n
				4381	/// Bits [95:80] are written to bits [47:32] of the result. \n
				4382	/// Bits [111:96] are written to bits [79:64] of the result. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4383	/// Bits [127:112] are written to bits [111:96] of the result.
				4384	/// \param __b
				4385	/// A 128-bit vector of [8 x i16].
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4386	/// Bits [79:64] are written to bits [31:16] of the result. \n
				4387	/// Bits [95:80] are written to bits [63:48] of the result. \n
				4388	/// Bits [111:96] are written to bits [95:80] of the result. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4389	/// Bits [127:112] are written to bits [127:112] of the result.
				4390	/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4391	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4392	_mm_unpackhi_epi16(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4393	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4394	return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4395	}
				4396
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4397	/// \brief Unpacks the high-order (index 2,3) values from two 128-bit vectors of
				4398	/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
				4399	///
				4400	/// \headerfile <x86intrin.h>
				4401	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4402	/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>
				4403	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4404	///
				4405	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4406	/// A 128-bit vector of [4 x i32]. \n
				4407	/// Bits [95:64] are written to bits [31:0] of the destination. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4408	/// Bits [127:96] are written to bits [95:64] of the destination.
				4409	/// \param __b
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4410	/// A 128-bit vector of [4 x i32]. \n
				4411	/// Bits [95:64] are written to bits [64:32] of the destination. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4412	/// Bits [127:96] are written to bits [127:96] of the destination.
				4413	/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4414	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4415	_mm_unpackhi_epi32(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4416	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4417	return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4418	}
				4419
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4420	/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
				4421	/// of [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
				4422	///
				4423	/// \headerfile <x86intrin.h>
				4424	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4425	/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>
				4426	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4427	///
				4428	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4429	/// A 128-bit vector of [2 x i64]. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4430	/// Bits [127:64] are written to bits [63:0] of the destination.
				4431	/// \param __b
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4432	/// A 128-bit vector of [2 x i64]. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4433	/// Bits [127:64] are written to bits [127:64] of the destination.
				4434	/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4435	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4436	_mm_unpackhi_epi64(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4437	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	4438	return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4439	}
				4440
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4441	/// \brief Unpacks the low-order (index 0-7) values from two 128-bit vectors of
				4442	/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
				4443	///
				4444	/// \headerfile <x86intrin.h>
				4445	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4446	/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>
				4447	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4448	///
				4449	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4450	/// A 128-bit vector of [16 x i8]. \n
				4451	/// Bits [7:0] are written to bits [7:0] of the result. \n
				4452	/// Bits [15:8] are written to bits [23:16] of the result. \n
				4453	/// Bits [23:16] are written to bits [39:32] of the result. \n
				4454	/// Bits [31:24] are written to bits [55:48] of the result. \n
				4455	/// Bits [39:32] are written to bits [71:64] of the result. \n
				4456	/// Bits [47:40] are written to bits [87:80] of the result. \n
				4457	/// Bits [55:48] are written to bits [103:96] of the result. \n
				4458	/// Bits [63:56] are written to bits [119:112] of the result.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4459	/// \param __b
				4460	/// A 128-bit vector of [16 x i8].
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4461	/// Bits [7:0] are written to bits [15:8] of the result. \n
				4462	/// Bits [15:8] are written to bits [31:24] of the result. \n
				4463	/// Bits [23:16] are written to bits [47:40] of the result. \n
				4464	/// Bits [31:24] are written to bits [63:56] of the result. \n
				4465	/// Bits [39:32] are written to bits [79:72] of the result. \n
				4466	/// Bits [47:40] are written to bits [95:88] of the result. \n
				4467	/// Bits [55:48] are written to bits [111:104] of the result. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4468	/// Bits [63:56] are written to bits [127:120] of the result.
				4469	/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4470	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4471	_mm_unpacklo_epi8(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4472	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4473	return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4474	}
				4475
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4476	/// \brief Unpacks the low-order (index 0-3) values from each of the two 128-bit
				4477	/// vectors of [8 x i16] and interleaves them into a 128-bit vector of
				4478	/// [8 x i16].
				4479	///
				4480	/// \headerfile <x86intrin.h>
				4481	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4482	/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>
				4483	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4484	///
				4485	/// \param __a
				4486	/// A 128-bit vector of [8 x i16].
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4487	/// Bits [15:0] are written to bits [15:0] of the result. \n
				4488	/// Bits [31:16] are written to bits [47:32] of the result. \n
				4489	/// Bits [47:32] are written to bits [79:64] of the result. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4490	/// Bits [63:48] are written to bits [111:96] of the result.
				4491	/// \param __b
				4492	/// A 128-bit vector of [8 x i16].
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4493	/// Bits [15:0] are written to bits [31:16] of the result. \n
				4494	/// Bits [31:16] are written to bits [63:48] of the result. \n
				4495	/// Bits [47:32] are written to bits [95:80] of the result. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4496	/// Bits [63:48] are written to bits [127:112] of the result.
				4497	/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4498	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4499	_mm_unpacklo_epi16(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4500	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4501	return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4502	}
				4503
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4504	/// \brief Unpacks the low-order (index 0,1) values from two 128-bit vectors of
				4505	/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
				4506	///
				4507	/// \headerfile <x86intrin.h>
				4508	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4509	/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>
				4510	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4511	///
				4512	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4513	/// A 128-bit vector of [4 x i32]. \n
				4514	/// Bits [31:0] are written to bits [31:0] of the destination. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4515	/// Bits [63:32] are written to bits [95:64] of the destination.
				4516	/// \param __b
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4517	/// A 128-bit vector of [4 x i32]. \n
				4518	/// Bits [31:0] are written to bits [64:32] of the destination. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4519	/// Bits [63:32] are written to bits [127:96] of the destination.
				4520	/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4521	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4522	_mm_unpacklo_epi32(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4523	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4524	return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4525	}
				4526
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4527	/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors of
				4528	/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
				4529	///
				4530	/// \headerfile <x86intrin.h>
				4531	///
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4532	/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
				4533	/// instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4534	///
				4535	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4536	/// A 128-bit vector of [2 x i64]. \n
				4537	/// Bits [63:0] are written to bits [63:0] of the destination. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4538	/// \param __b
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4539	/// A 128-bit vector of [2 x i64]. \n
				4540	/// Bits [63:0] are written to bits [127:64] of the destination. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4541	/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4542	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4543	_mm_unpacklo_epi64(__m128i __a, __m128i __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4544	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	4545	return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4546	}
				4547
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4548	/// \brief Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
Ekaterina Romanova	493091f	2016-10-20 17:59:15 +0000	[diff] [blame]	4549	/// integer.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4550	///
				4551	/// \headerfile <x86intrin.h>
				4552	///
				4553	/// This intrinsic has no corresponding instruction.
				4554	///
				4555	/// \param __a
				4556	/// A 128-bit integer vector operand. The lower 64 bits are moved to the
				4557	/// destination.
				4558	/// \returns A 64-bit integer containing the lower 64 bits of the parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4559	static __inline__ __m64 __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4560	_mm_movepi64_pi64(__m128i __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4561	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4562	return (__m64)__a[0];
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4563	}
				4564
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4565	/// \brief Moves the 64-bit operand to a 128-bit integer vector, zeroing the
				4566	/// upper bits.
				4567	///
				4568	/// \headerfile <x86intrin.h>
				4569	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4570	/// This intrinsic corresponds to the <c> VMOVQ / MOVQ / MOVD </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4571	///
				4572	/// \param __a
				4573	/// A 64-bit value.
				4574	/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
				4575	/// the operand. The upper 64 bits are assigned zeros.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4576	static __inline__ __m128i __DEFAULT_FN_ATTRS
Alp Toker	d480b1b	2013-11-23 22:11:57 +0000	[diff] [blame]	4577	_mm_movpi64_epi64(__m64 __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4578	{
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4579	return (__m128i){ (long long)__a, 0 };
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4580	}
				4581
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4582	/// \brief Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
				4583	/// integer vector, zeroing the upper bits.
				4584	///
				4585	/// \headerfile <x86intrin.h>
				4586	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4587	/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4588	///
				4589	/// \param __a
				4590	/// A 128-bit integer vector operand. The lower 64 bits are moved to the
				4591	/// destination.
				4592	/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
				4593	/// the operand. The upper 64 bits are assigned zeros.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4594	static __inline__ __m128i __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4595	_mm_move_epi64(__m128i __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4596	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	4597	return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4598	}
				4599
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4600	/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
				4601	/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
				4602	/// double].
				4603	///
				4604	/// \headerfile <x86intrin.h>
				4605	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4606	/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4607	///
				4608	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4609	/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4610	/// Bits [127:64] are written to bits [63:0] of the destination.
				4611	/// \param __b
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4612	/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4613	/// Bits [127:64] are written to bits [127:64] of the destination.
				4614	/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4615	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4616	_mm_unpackhi_pd(__m128d __a, __m128d __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4617	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	4618	return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4619	}
				4620
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4621	/// \brief Unpacks the low-order (even-indexed) values from two 128-bit vectors
				4622	/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
				4623	/// double].
				4624	///
				4625	/// \headerfile <x86intrin.h>
				4626	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4627	/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4628	///
				4629	/// \param __a
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4630	/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4631	/// Bits [63:0] are written to bits [63:0] of the destination.
				4632	/// \param __b
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4633	/// A 128-bit vector of [2 x double]. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4634	/// Bits [63:0] are written to bits [127:64] of the destination.
				4635	/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4636	static __inline__ __m128d __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4637	_mm_unpacklo_pd(__m128d __a, __m128d __b)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4638	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	4639	return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4640	}
				4641
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4642	/// \brief Extracts the sign bits of the double-precision values in the 128-bit
				4643	/// vector of [2 x double], zero-extends the value, and writes it to the
				4644	/// low-order bits of the destination.
				4645	///
				4646	/// \headerfile <x86intrin.h>
				4647	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4648	/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4649	///
				4650	/// \param __a
				4651	/// A 128-bit vector of [2 x double] containing the values with sign bits to
				4652	/// be extracted.
Ekaterina Romanova	797b0eb	2016-12-08 22:10:51 +0000	[diff] [blame]	4653	/// \returns The sign bits from each of the double-precision elements in \a __a,
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4654	/// written to bits [1:0]. The remaining bits are assigned values of zero.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4655	static __inline__ int __DEFAULT_FN_ATTRS
David Blaikie	3302f2b	2013-01-16 23:08:36 +0000	[diff] [blame]	4656	_mm_movemask_pd(__m128d __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4657	{
Craig Topper	1aa231e	2016-05-16 06:38:42 +0000	[diff] [blame]	4658	return __builtin_ia32_movmskpd((__v2df)__a);
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4659	}
				4660
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4661
				4662	/// \brief Constructs a 128-bit floating-point vector of [2 x double] from two
				4663	/// 128-bit vector parameters of [2 x double], using the immediate-value
				4664	/// parameter as a specifier.
				4665	///
				4666	/// \headerfile <x86intrin.h>
				4667	///
				4668	/// \code
				4669	/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);
				4670	/// \endcode
				4671	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4672	/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4673	///
				4674	/// \param a
				4675	/// A 128-bit vector of [2 x double].
				4676	/// \param b
				4677	/// A 128-bit vector of [2 x double].
				4678	/// \param i
				4679	/// An 8-bit immediate value. The least significant two bits specify which
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	4680	/// elements to copy from a and b: \n
				4681	/// Bit[0] = 0: lower element of a copied to lower element of result. \n
				4682	/// Bit[0] = 1: upper element of a copied to lower element of result. \n
				4683	/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
				4684	/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4685	/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
Bob Wilson	c9b97cc	2011-11-05 06:08:06 +0000	[diff] [blame]	4686	#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
Craig Topper	d619eaaa	2015-11-11 03:47:10 +0000	[diff] [blame]	4687	(__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
Craig Topper	2a383c9	2016-07-04 22:18:01 +0000	[diff] [blame]	4688	0 + (((i) >> 0) & 0x1), \
				4689	2 + (((i) >> 1) & 0x1)); })
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4690
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4691	/// \brief Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
				4692	/// floating-point vector of [4 x float].
				4693	///
				4694	/// \headerfile <x86intrin.h>
				4695	///
				4696	/// This intrinsic has no corresponding instruction.
				4697	///
				4698	/// \param __a
				4699	/// A 128-bit floating-point vector of [2 x double].
				4700	/// \returns A 128-bit floating-point vector of [4 x float] containing the same
				4701	/// bitwise pattern as the parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4702	static __inline__ __m128 __DEFAULT_FN_ATTRS
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4703	_mm_castpd_ps(__m128d __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4704	{
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4705	return (__m128)__a;
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4706	}
				4707
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4708	/// \brief Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
				4709	/// integer vector.
				4710	///
				4711	/// \headerfile <x86intrin.h>
				4712	///
				4713	/// This intrinsic has no corresponding instruction.
				4714	///
				4715	/// \param __a
				4716	/// A 128-bit floating-point vector of [2 x double].
				4717	/// \returns A 128-bit integer vector containing the same bitwise pattern as the
				4718	/// parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4719	static __inline__ __m128i __DEFAULT_FN_ATTRS
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4720	_mm_castpd_si128(__m128d __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4721	{
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4722	return (__m128i)__a;
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4723	}
				4724
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4725	/// \brief Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
				4726	/// floating-point vector of [2 x double].
				4727	///
				4728	/// \headerfile <x86intrin.h>
				4729	///
				4730	/// This intrinsic has no corresponding instruction.
				4731	///
				4732	/// \param __a
				4733	/// A 128-bit floating-point vector of [4 x float].
				4734	/// \returns A 128-bit floating-point vector of [2 x double] containing the same
				4735	/// bitwise pattern as the parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4736	static __inline__ __m128d __DEFAULT_FN_ATTRS
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4737	_mm_castps_pd(__m128 __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4738	{
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4739	return (__m128d)__a;
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4740	}
				4741
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4742	/// \brief Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
				4743	/// integer vector.
				4744	///
				4745	/// \headerfile <x86intrin.h>
				4746	///
				4747	/// This intrinsic has no corresponding instruction.
				4748	///
				4749	/// \param __a
				4750	/// A 128-bit floating-point vector of [4 x float].
				4751	/// \returns A 128-bit integer vector containing the same bitwise pattern as the
				4752	/// parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4753	static __inline__ __m128i __DEFAULT_FN_ATTRS
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4754	_mm_castps_si128(__m128 __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4755	{
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4756	return (__m128i)__a;
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4757	}
				4758
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4759	/// \brief Casts a 128-bit integer vector into a 128-bit floating-point vector
				4760	/// of [4 x float].
				4761	///
				4762	/// \headerfile <x86intrin.h>
				4763	///
				4764	/// This intrinsic has no corresponding instruction.
				4765	///
				4766	/// \param __a
				4767	/// A 128-bit integer vector.
				4768	/// \returns A 128-bit floating-point vector of [4 x float] containing the same
				4769	/// bitwise pattern as the parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4770	static __inline__ __m128 __DEFAULT_FN_ATTRS
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4771	_mm_castsi128_ps(__m128i __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4772	{
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4773	return (__m128)__a;
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4774	}
				4775
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4776	/// \brief Casts a 128-bit integer vector into a 128-bit floating-point vector
				4777	/// of [2 x double].
				4778	///
				4779	/// \headerfile <x86intrin.h>
				4780	///
				4781	/// This intrinsic has no corresponding instruction.
				4782	///
				4783	/// \param __a
				4784	/// A 128-bit integer vector.
				4785	/// \returns A 128-bit floating-point vector of [2 x double] containing the same
				4786	/// bitwise pattern as the parameter.
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4787	static __inline__ __m128d __DEFAULT_FN_ATTRS
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4788	_mm_castsi128_pd(__m128i __a)
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4789	{
Reid Kleckner	7ab75b3	2013-04-19 17:00:14 +0000	[diff] [blame]	4790	return (__m128d)__a;
Anders Carlsson	85eb124	2008-12-26 00:45:50 +0000	[diff] [blame]	4791	}
				4792
Ekaterina Romanova	2174b6f	2016-11-17 23:02:00 +0000	[diff] [blame]	4793	#if defined(__cplusplus)
				4794	extern "C" {
				4795	#endif
				4796
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4797	/// \brief Indicates that a spin loop is being executed for the purposes of
				4798	/// optimizing power consumption during the loop.
				4799	///
				4800	/// \headerfile <x86intrin.h>
				4801	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	4802	/// This intrinsic corresponds to the <c> PAUSE </c> instruction.
Ekaterina Romanova	a84c24f	2016-07-22 23:49:37 +0000	[diff] [blame]	4803	///
Albert Gutowski	727ab8a	2016-09-14 21:19:43 +0000	[diff] [blame]	4804	void _mm_pause(void);
Anders Carlsson	37c2371	2008-12-26 00:49:43 +0000	[diff] [blame]	4805
Ekaterina Romanova	2174b6f	2016-11-17 23:02:00 +0000	[diff] [blame]	4806	#if defined(__cplusplus)
				4807	} // extern "C"
				4808	#endif
Michael Kuperstein	e45af54	2015-06-30 13:36:19 +0000	[diff] [blame]	4809	#undef __DEFAULT_FN_ATTRS
Eric Christopher	4d185168	2015-06-17 07:09:20 +0000	[diff] [blame]	4810
Anders Carlsson	43c2bab	2009-01-21 01:49:39 +0000	[diff] [blame]	4811	#define _MM_SHUFFLE2(x, y) (((x) << 1) \| (y))
Anders Carlsson	37c2371	2008-12-26 00:49:43 +0000	[diff] [blame]	4812
Anders Carlsson	f15e71d	2008-12-24 01:45:22 +0000	[diff] [blame]	4813	#endif /* __EMMINTRIN_H */