Blame - third_party/llvm-build/Release+Asserts/lib/clang/3.9.0/include/emmintrin.h - fp2-dev/platform/external/v8

blob: aba2438ca0a427306d09c652979f9a83cedebefe [file] [log] [blame]

Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1	/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
				2	*
				3	* Permission is hereby granted, free of charge, to any person obtaining a copy
				4	* of this software and associated documentation files (the "Software"), to deal
				5	* in the Software without restriction, including without limitation the rights
				6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				7	* copies of the Software, and to permit persons to whom the Software is
				8	* furnished to do so, subject to the following conditions:
				9	*
				10	* The above copyright notice and this permission notice shall be included in
				11	* all copies or substantial portions of the Software.
				12	*
				13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				19	* THE SOFTWARE.
				20	*
				21	*===-----------------------------------------------------------------------===
				22	*/
				23
				24	#ifndef __EMMINTRIN_H
				25	#define __EMMINTRIN_H
				26
				27	#include <xmmintrin.h>
				28
				29	typedef double __m128d __attribute__((__vector_size__(16)));
				30	typedef long long __m128i __attribute__((__vector_size__(16)));
				31
				32	/* Type defines. */
				33	typedef double __v2df __attribute__ ((__vector_size__ (16)));
				34	typedef long long __v2di __attribute__ ((__vector_size__ (16)));
				35	typedef short __v8hi __attribute__((__vector_size__(16)));
				36	typedef char __v16qi __attribute__((__vector_size__(16)));
				37
				38	/* We need an explicitly signed variant for char. Note that this shouldn't
				39	* appear in the interface though. */
				40	typedef signed char __v16qs __attribute__((__vector_size__(16)));
				41
				42	#include <f16cintrin.h>
				43
				44	/* Define the default attributes for the functions in this file. */
				45	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
				46
				47	static __inline__ __m128d __DEFAULT_FN_ATTRS
				48	_mm_add_sd(__m128d __a, __m128d __b)
				49	{
				50	__a[0] += __b[0];
				51	return __a;
				52	}
				53
				54	static __inline__ __m128d __DEFAULT_FN_ATTRS
				55	_mm_add_pd(__m128d __a, __m128d __b)
				56	{
				57	return __a + __b;
				58	}
				59
				60	static __inline__ __m128d __DEFAULT_FN_ATTRS
				61	_mm_sub_sd(__m128d __a, __m128d __b)
				62	{
				63	__a[0] -= __b[0];
				64	return __a;
				65	}
				66
				67	static __inline__ __m128d __DEFAULT_FN_ATTRS
				68	_mm_sub_pd(__m128d __a, __m128d __b)
				69	{
				70	return __a - __b;
				71	}
				72
				73	static __inline__ __m128d __DEFAULT_FN_ATTRS
				74	_mm_mul_sd(__m128d __a, __m128d __b)
				75	{
				76	__a[0] *= __b[0];
				77	return __a;
				78	}
				79
				80	static __inline__ __m128d __DEFAULT_FN_ATTRS
				81	_mm_mul_pd(__m128d __a, __m128d __b)
				82	{
				83	return __a * __b;
				84	}
				85
				86	static __inline__ __m128d __DEFAULT_FN_ATTRS
				87	_mm_div_sd(__m128d __a, __m128d __b)
				88	{
				89	__a[0] /= __b[0];
				90	return __a;
				91	}
				92
				93	static __inline__ __m128d __DEFAULT_FN_ATTRS
				94	_mm_div_pd(__m128d __a, __m128d __b)
				95	{
				96	return __a / __b;
				97	}
				98
				99	static __inline__ __m128d __DEFAULT_FN_ATTRS
				100	_mm_sqrt_sd(__m128d __a, __m128d __b)
				101	{
				102	__m128d __c = __builtin_ia32_sqrtsd(__b);
				103	return (__m128d) { __c[0], __a[1] };
				104	}
				105
				106	static __inline__ __m128d __DEFAULT_FN_ATTRS
				107	_mm_sqrt_pd(__m128d __a)
				108	{
				109	return __builtin_ia32_sqrtpd(__a);
				110	}
				111
				112	static __inline__ __m128d __DEFAULT_FN_ATTRS
				113	_mm_min_sd(__m128d __a, __m128d __b)
				114	{
				115	return __builtin_ia32_minsd(__a, __b);
				116	}
				117
				118	static __inline__ __m128d __DEFAULT_FN_ATTRS
				119	_mm_min_pd(__m128d __a, __m128d __b)
				120	{
				121	return __builtin_ia32_minpd(__a, __b);
				122	}
				123
				124	static __inline__ __m128d __DEFAULT_FN_ATTRS
				125	_mm_max_sd(__m128d __a, __m128d __b)
				126	{
				127	return __builtin_ia32_maxsd(__a, __b);
				128	}
				129
				130	static __inline__ __m128d __DEFAULT_FN_ATTRS
				131	_mm_max_pd(__m128d __a, __m128d __b)
				132	{
				133	return __builtin_ia32_maxpd(__a, __b);
				134	}
				135
				136	static __inline__ __m128d __DEFAULT_FN_ATTRS
				137	_mm_and_pd(__m128d __a, __m128d __b)
				138	{
				139	return (__m128d)((__v4si)__a & (__v4si)__b);
				140	}
				141
				142	static __inline__ __m128d __DEFAULT_FN_ATTRS
				143	_mm_andnot_pd(__m128d __a, __m128d __b)
				144	{
				145	return (__m128d)(~(__v4si)__a & (__v4si)__b);
				146	}
				147
				148	static __inline__ __m128d __DEFAULT_FN_ATTRS
				149	_mm_or_pd(__m128d __a, __m128d __b)
				150	{
				151	return (__m128d)((__v4si)__a \| (__v4si)__b);
				152	}
				153
				154	static __inline__ __m128d __DEFAULT_FN_ATTRS
				155	_mm_xor_pd(__m128d __a, __m128d __b)
				156	{
				157	return (__m128d)((__v4si)__a ^ (__v4si)__b);
				158	}
				159
				160	static __inline__ __m128d __DEFAULT_FN_ATTRS
				161	_mm_cmpeq_pd(__m128d __a, __m128d __b)
				162	{
				163	return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
				164	}
				165
				166	static __inline__ __m128d __DEFAULT_FN_ATTRS
				167	_mm_cmplt_pd(__m128d __a, __m128d __b)
				168	{
				169	return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
				170	}
				171
				172	static __inline__ __m128d __DEFAULT_FN_ATTRS
				173	_mm_cmple_pd(__m128d __a, __m128d __b)
				174	{
				175	return (__m128d)__builtin_ia32_cmplepd(__a, __b);
				176	}
				177
				178	static __inline__ __m128d __DEFAULT_FN_ATTRS
				179	_mm_cmpgt_pd(__m128d __a, __m128d __b)
				180	{
				181	return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
				182	}
				183
				184	static __inline__ __m128d __DEFAULT_FN_ATTRS
				185	_mm_cmpge_pd(__m128d __a, __m128d __b)
				186	{
				187	return (__m128d)__builtin_ia32_cmplepd(__b, __a);
				188	}
				189
				190	static __inline__ __m128d __DEFAULT_FN_ATTRS
				191	_mm_cmpord_pd(__m128d __a, __m128d __b)
				192	{
				193	return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
				194	}
				195
				196	static __inline__ __m128d __DEFAULT_FN_ATTRS
				197	_mm_cmpunord_pd(__m128d __a, __m128d __b)
				198	{
				199	return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
				200	}
				201
				202	static __inline__ __m128d __DEFAULT_FN_ATTRS
				203	_mm_cmpneq_pd(__m128d __a, __m128d __b)
				204	{
				205	return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
				206	}
				207
				208	static __inline__ __m128d __DEFAULT_FN_ATTRS
				209	_mm_cmpnlt_pd(__m128d __a, __m128d __b)
				210	{
				211	return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
				212	}
				213
				214	static __inline__ __m128d __DEFAULT_FN_ATTRS
				215	_mm_cmpnle_pd(__m128d __a, __m128d __b)
				216	{
				217	return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
				218	}
				219
				220	static __inline__ __m128d __DEFAULT_FN_ATTRS
				221	_mm_cmpngt_pd(__m128d __a, __m128d __b)
				222	{
				223	return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
				224	}
				225
				226	static __inline__ __m128d __DEFAULT_FN_ATTRS
				227	_mm_cmpnge_pd(__m128d __a, __m128d __b)
				228	{
				229	return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
				230	}
				231
				232	static __inline__ __m128d __DEFAULT_FN_ATTRS
				233	_mm_cmpeq_sd(__m128d __a, __m128d __b)
				234	{
				235	return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
				236	}
				237
				238	static __inline__ __m128d __DEFAULT_FN_ATTRS
				239	_mm_cmplt_sd(__m128d __a, __m128d __b)
				240	{
				241	return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
				242	}
				243
				244	static __inline__ __m128d __DEFAULT_FN_ATTRS
				245	_mm_cmple_sd(__m128d __a, __m128d __b)
				246	{
				247	return (__m128d)__builtin_ia32_cmplesd(__a, __b);
				248	}
				249
				250	static __inline__ __m128d __DEFAULT_FN_ATTRS
				251	_mm_cmpgt_sd(__m128d __a, __m128d __b)
				252	{
				253	__m128d __c = __builtin_ia32_cmpltsd(__b, __a);
				254	return (__m128d) { __c[0], __a[1] };
				255	}
				256
				257	static __inline__ __m128d __DEFAULT_FN_ATTRS
				258	_mm_cmpge_sd(__m128d __a, __m128d __b)
				259	{
				260	__m128d __c = __builtin_ia32_cmplesd(__b, __a);
				261	return (__m128d) { __c[0], __a[1] };
				262	}
				263
				264	static __inline__ __m128d __DEFAULT_FN_ATTRS
				265	_mm_cmpord_sd(__m128d __a, __m128d __b)
				266	{
				267	return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
				268	}
				269
				270	static __inline__ __m128d __DEFAULT_FN_ATTRS
				271	_mm_cmpunord_sd(__m128d __a, __m128d __b)
				272	{
				273	return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
				274	}
				275
				276	static __inline__ __m128d __DEFAULT_FN_ATTRS
				277	_mm_cmpneq_sd(__m128d __a, __m128d __b)
				278	{
				279	return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
				280	}
				281
				282	static __inline__ __m128d __DEFAULT_FN_ATTRS
				283	_mm_cmpnlt_sd(__m128d __a, __m128d __b)
				284	{
				285	return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
				286	}
				287
				288	static __inline__ __m128d __DEFAULT_FN_ATTRS
				289	_mm_cmpnle_sd(__m128d __a, __m128d __b)
				290	{
				291	return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
				292	}
				293
				294	static __inline__ __m128d __DEFAULT_FN_ATTRS
				295	_mm_cmpngt_sd(__m128d __a, __m128d __b)
				296	{
				297	__m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
				298	return (__m128d) { __c[0], __a[1] };
				299	}
				300
				301	static __inline__ __m128d __DEFAULT_FN_ATTRS
				302	_mm_cmpnge_sd(__m128d __a, __m128d __b)
				303	{
				304	__m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
				305	return (__m128d) { __c[0], __a[1] };
				306	}
				307
				308	static __inline__ int __DEFAULT_FN_ATTRS
				309	_mm_comieq_sd(__m128d __a, __m128d __b)
				310	{
				311	return __builtin_ia32_comisdeq(__a, __b);
				312	}
				313
				314	static __inline__ int __DEFAULT_FN_ATTRS
				315	_mm_comilt_sd(__m128d __a, __m128d __b)
				316	{
				317	return __builtin_ia32_comisdlt(__a, __b);
				318	}
				319
				320	static __inline__ int __DEFAULT_FN_ATTRS
				321	_mm_comile_sd(__m128d __a, __m128d __b)
				322	{
				323	return __builtin_ia32_comisdle(__a, __b);
				324	}
				325
				326	static __inline__ int __DEFAULT_FN_ATTRS
				327	_mm_comigt_sd(__m128d __a, __m128d __b)
				328	{
				329	return __builtin_ia32_comisdgt(__a, __b);
				330	}
				331
				332	static __inline__ int __DEFAULT_FN_ATTRS
				333	_mm_comige_sd(__m128d __a, __m128d __b)
				334	{
				335	return __builtin_ia32_comisdge(__a, __b);
				336	}
				337
				338	static __inline__ int __DEFAULT_FN_ATTRS
				339	_mm_comineq_sd(__m128d __a, __m128d __b)
				340	{
				341	return __builtin_ia32_comisdneq(__a, __b);
				342	}
				343
				344	static __inline__ int __DEFAULT_FN_ATTRS
				345	_mm_ucomieq_sd(__m128d __a, __m128d __b)
				346	{
				347	return __builtin_ia32_ucomisdeq(__a, __b);
				348	}
				349
				350	static __inline__ int __DEFAULT_FN_ATTRS
				351	_mm_ucomilt_sd(__m128d __a, __m128d __b)
				352	{
				353	return __builtin_ia32_ucomisdlt(__a, __b);
				354	}
				355
				356	static __inline__ int __DEFAULT_FN_ATTRS
				357	_mm_ucomile_sd(__m128d __a, __m128d __b)
				358	{
				359	return __builtin_ia32_ucomisdle(__a, __b);
				360	}
				361
				362	static __inline__ int __DEFAULT_FN_ATTRS
				363	_mm_ucomigt_sd(__m128d __a, __m128d __b)
				364	{
				365	return __builtin_ia32_ucomisdgt(__a, __b);
				366	}
				367
				368	static __inline__ int __DEFAULT_FN_ATTRS
				369	_mm_ucomige_sd(__m128d __a, __m128d __b)
				370	{
				371	return __builtin_ia32_ucomisdge(__a, __b);
				372	}
				373
				374	static __inline__ int __DEFAULT_FN_ATTRS
				375	_mm_ucomineq_sd(__m128d __a, __m128d __b)
				376	{
				377	return __builtin_ia32_ucomisdneq(__a, __b);
				378	}
				379
				380	static __inline__ __m128 __DEFAULT_FN_ATTRS
				381	_mm_cvtpd_ps(__m128d __a)
				382	{
				383	return __builtin_ia32_cvtpd2ps(__a);
				384	}
				385
				386	static __inline__ __m128d __DEFAULT_FN_ATTRS
				387	_mm_cvtps_pd(__m128 __a)
				388	{
				389	return __builtin_ia32_cvtps2pd(__a);
				390	}
				391
				392	static __inline__ __m128d __DEFAULT_FN_ATTRS
				393	_mm_cvtepi32_pd(__m128i __a)
				394	{
				395	return __builtin_ia32_cvtdq2pd((__v4si)__a);
				396	}
				397
				398	static __inline__ __m128i __DEFAULT_FN_ATTRS
				399	_mm_cvtpd_epi32(__m128d __a)
				400	{
				401	return __builtin_ia32_cvtpd2dq(__a);
				402	}
				403
				404	static __inline__ int __DEFAULT_FN_ATTRS
				405	_mm_cvtsd_si32(__m128d __a)
				406	{
				407	return __builtin_ia32_cvtsd2si(__a);
				408	}
				409
				410	static __inline__ __m128 __DEFAULT_FN_ATTRS
				411	_mm_cvtsd_ss(__m128 __a, __m128d __b)
				412	{
				413	__a[0] = __b[0];
				414	return __a;
				415	}
				416
				417	static __inline__ __m128d __DEFAULT_FN_ATTRS
				418	_mm_cvtsi32_sd(__m128d __a, int __b)
				419	{
				420	__a[0] = __b;
				421	return __a;
				422	}
				423
				424	static __inline__ __m128d __DEFAULT_FN_ATTRS
				425	_mm_cvtss_sd(__m128d __a, __m128 __b)
				426	{
				427	__a[0] = __b[0];
				428	return __a;
				429	}
				430
				431	static __inline__ __m128i __DEFAULT_FN_ATTRS
				432	_mm_cvttpd_epi32(__m128d __a)
				433	{
				434	return (__m128i)__builtin_ia32_cvttpd2dq(__a);
				435	}
				436
				437	static __inline__ int __DEFAULT_FN_ATTRS
				438	_mm_cvttsd_si32(__m128d __a)
				439	{
				440	return __a[0];
				441	}
				442
				443	static __inline__ __m64 __DEFAULT_FN_ATTRS
				444	_mm_cvtpd_pi32(__m128d __a)
				445	{
				446	return (__m64)__builtin_ia32_cvtpd2pi(__a);
				447	}
				448
				449	static __inline__ __m64 __DEFAULT_FN_ATTRS
				450	_mm_cvttpd_pi32(__m128d __a)
				451	{
				452	return (__m64)__builtin_ia32_cvttpd2pi(__a);
				453	}
				454
				455	static __inline__ __m128d __DEFAULT_FN_ATTRS
				456	_mm_cvtpi32_pd(__m64 __a)
				457	{
				458	return __builtin_ia32_cvtpi2pd((__v2si)__a);
				459	}
				460
				461	static __inline__ double __DEFAULT_FN_ATTRS
				462	_mm_cvtsd_f64(__m128d __a)
				463	{
				464	return __a[0];
				465	}
				466
				467	static __inline__ __m128d __DEFAULT_FN_ATTRS
				468	_mm_load_pd(double const *__dp)
				469	{
				470	return (__m128d)__dp;
				471	}
				472
				473	static __inline__ __m128d __DEFAULT_FN_ATTRS
				474	_mm_load1_pd(double const *__dp)
				475	{
				476	struct __mm_load1_pd_struct {
				477	double __u;
				478	} __attribute__((__packed__, __may_alias__));
				479	double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
				480	return (__m128d){ __u, __u };
				481	}
				482
				483	#define _mm_load_pd1(dp) _mm_load1_pd(dp)
				484
				485	static __inline__ __m128d __DEFAULT_FN_ATTRS
				486	_mm_loadr_pd(double const *__dp)
				487	{
				488	__m128d __u = (__m128d)__dp;
				489	return __builtin_shufflevector(__u, __u, 1, 0);
				490	}
				491
				492	static __inline__ __m128d __DEFAULT_FN_ATTRS
				493	_mm_loadu_pd(double const *__dp)
				494	{
				495	struct __loadu_pd {
				496	__m128d __v;
				497	} __attribute__((__packed__, __may_alias__));
				498	return ((struct __loadu_pd*)__dp)->__v;
				499	}
				500
				501	static __inline__ __m128d __DEFAULT_FN_ATTRS
				502	_mm_load_sd(double const *__dp)
				503	{
				504	struct __mm_load_sd_struct {
				505	double __u;
				506	} __attribute__((__packed__, __may_alias__));
				507	double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
				508	return (__m128d){ __u, 0 };
				509	}
				510
				511	static __inline__ __m128d __DEFAULT_FN_ATTRS
				512	_mm_loadh_pd(__m128d __a, double const *__dp)
				513	{
				514	struct __mm_loadh_pd_struct {
				515	double __u;
				516	} __attribute__((__packed__, __may_alias__));
				517	double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
				518	return (__m128d){ __a[0], __u };
				519	}
				520
				521	static __inline__ __m128d __DEFAULT_FN_ATTRS
				522	_mm_loadl_pd(__m128d __a, double const *__dp)
				523	{
				524	struct __mm_loadl_pd_struct {
				525	double __u;
				526	} __attribute__((__packed__, __may_alias__));
				527	double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
				528	return (__m128d){ __u, __a[1] };
				529	}
				530
				531	static __inline__ __m128d __DEFAULT_FN_ATTRS
				532	_mm_undefined_pd()
				533	{
				534	return (__m128d)__builtin_ia32_undef128();
				535	}
				536
				537	static __inline__ __m128d __DEFAULT_FN_ATTRS
				538	_mm_set_sd(double __w)
				539	{
				540	return (__m128d){ __w, 0 };
				541	}
				542
				543	static __inline__ __m128d __DEFAULT_FN_ATTRS
				544	_mm_set1_pd(double __w)
				545	{
				546	return (__m128d){ __w, __w };
				547	}
				548
				549	static __inline__ __m128d __DEFAULT_FN_ATTRS
				550	_mm_set_pd(double __w, double __x)
				551	{
				552	return (__m128d){ __x, __w };
				553	}
				554
				555	static __inline__ __m128d __DEFAULT_FN_ATTRS
				556	_mm_setr_pd(double __w, double __x)
				557	{
				558	return (__m128d){ __w, __x };
				559	}
				560
				561	static __inline__ __m128d __DEFAULT_FN_ATTRS
				562	_mm_setzero_pd(void)
				563	{
				564	return (__m128d){ 0, 0 };
				565	}
				566
				567	static __inline__ __m128d __DEFAULT_FN_ATTRS
				568	_mm_move_sd(__m128d __a, __m128d __b)
				569	{
				570	return (__m128d){ __b[0], __a[1] };
				571	}
				572
				573	static __inline__ void __DEFAULT_FN_ATTRS
				574	_mm_store_sd(double *__dp, __m128d __a)
				575	{
				576	struct __mm_store_sd_struct {
				577	double __u;
				578	} __attribute__((__packed__, __may_alias__));
				579	((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
				580	}
				581
				582	static __inline__ void __DEFAULT_FN_ATTRS
				583	_mm_store1_pd(double *__dp, __m128d __a)
				584	{
				585	struct __mm_store1_pd_struct {
				586	double __u[2];
				587	} __attribute__((__packed__, __may_alias__));
				588	((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
				589	((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
				590	}
				591
				592	static __inline__ void __DEFAULT_FN_ATTRS
				593	_mm_store_pd(double *__dp, __m128d __a)
				594	{
				595	(__m128d )__dp = __a;
				596	}
				597
				598	static __inline__ void __DEFAULT_FN_ATTRS
				599	_mm_storeu_pd(double *__dp, __m128d __a)
				600	{
				601	__builtin_ia32_storeupd(__dp, __a);
				602	}
				603
				604	static __inline__ void __DEFAULT_FN_ATTRS
				605	_mm_storer_pd(double *__dp, __m128d __a)
				606	{
				607	__a = __builtin_shufflevector(__a, __a, 1, 0);
				608	(__m128d )__dp = __a;
				609	}
				610
				611	static __inline__ void __DEFAULT_FN_ATTRS
				612	_mm_storeh_pd(double *__dp, __m128d __a)
				613	{
				614	struct __mm_storeh_pd_struct {
				615	double __u;
				616	} __attribute__((__packed__, __may_alias__));
				617	((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
				618	}
				619
				620	static __inline__ void __DEFAULT_FN_ATTRS
				621	_mm_storel_pd(double *__dp, __m128d __a)
				622	{
				623	struct __mm_storeh_pd_struct {
				624	double __u;
				625	} __attribute__((__packed__, __may_alias__));
				626	((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
				627	}
				628
				629	static __inline__ __m128i __DEFAULT_FN_ATTRS
				630	_mm_add_epi8(__m128i __a, __m128i __b)
				631	{
				632	return (__m128i)((__v16qi)__a + (__v16qi)__b);
				633	}
				634
				635	static __inline__ __m128i __DEFAULT_FN_ATTRS
				636	_mm_add_epi16(__m128i __a, __m128i __b)
				637	{
				638	return (__m128i)((__v8hi)__a + (__v8hi)__b);
				639	}
				640
				641	static __inline__ __m128i __DEFAULT_FN_ATTRS
				642	_mm_add_epi32(__m128i __a, __m128i __b)
				643	{
				644	return (__m128i)((__v4si)__a + (__v4si)__b);
				645	}
				646
				647	static __inline__ __m64 __DEFAULT_FN_ATTRS
				648	_mm_add_si64(__m64 __a, __m64 __b)
				649	{
				650	return (__m64)__builtin_ia32_paddq(__a, __b);
				651	}
				652
				653	static __inline__ __m128i __DEFAULT_FN_ATTRS
				654	_mm_add_epi64(__m128i __a, __m128i __b)
				655	{
				656	return __a + __b;
				657	}
				658
				659	static __inline__ __m128i __DEFAULT_FN_ATTRS
				660	_mm_adds_epi8(__m128i __a, __m128i __b)
				661	{
				662	return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
				663	}
				664
				665	static __inline__ __m128i __DEFAULT_FN_ATTRS
				666	_mm_adds_epi16(__m128i __a, __m128i __b)
				667	{
				668	return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
				669	}
				670
				671	static __inline__ __m128i __DEFAULT_FN_ATTRS
				672	_mm_adds_epu8(__m128i __a, __m128i __b)
				673	{
				674	return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
				675	}
				676
				677	static __inline__ __m128i __DEFAULT_FN_ATTRS
				678	_mm_adds_epu16(__m128i __a, __m128i __b)
				679	{
				680	return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
				681	}
				682
				683	static __inline__ __m128i __DEFAULT_FN_ATTRS
				684	_mm_avg_epu8(__m128i __a, __m128i __b)
				685	{
				686	return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
				687	}
				688
				689	static __inline__ __m128i __DEFAULT_FN_ATTRS
				690	_mm_avg_epu16(__m128i __a, __m128i __b)
				691	{
				692	return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
				693	}
				694
				695	static __inline__ __m128i __DEFAULT_FN_ATTRS
				696	_mm_madd_epi16(__m128i __a, __m128i __b)
				697	{
				698	return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
				699	}
				700
				701	static __inline__ __m128i __DEFAULT_FN_ATTRS
				702	_mm_max_epi16(__m128i __a, __m128i __b)
				703	{
				704	return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
				705	}
				706
				707	static __inline__ __m128i __DEFAULT_FN_ATTRS
				708	_mm_max_epu8(__m128i __a, __m128i __b)
				709	{
				710	return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
				711	}
				712
				713	static __inline__ __m128i __DEFAULT_FN_ATTRS
				714	_mm_min_epi16(__m128i __a, __m128i __b)
				715	{
				716	return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
				717	}
				718
				719	static __inline__ __m128i __DEFAULT_FN_ATTRS
				720	_mm_min_epu8(__m128i __a, __m128i __b)
				721	{
				722	return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
				723	}
				724
				725	static __inline__ __m128i __DEFAULT_FN_ATTRS
				726	_mm_mulhi_epi16(__m128i __a, __m128i __b)
				727	{
				728	return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
				729	}
				730
				731	static __inline__ __m128i __DEFAULT_FN_ATTRS
				732	_mm_mulhi_epu16(__m128i __a, __m128i __b)
				733	{
				734	return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
				735	}
				736
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	737	/// \brief Multiplies the corresponding elements of two [8 x short] vectors and
				738	/// returns a vector containing the low-order 16 bits of each 32-bit product
				739	/// in the corresponding element.
				740	///
				741	/// \headerfile <x86intrin.h>
				742	///
				743	/// This intrinsic corresponds to the \c VPMULLW / PMULLW instruction.
				744	///
				745	/// \param __a
				746	/// A 128-bit integer vector containing one of the source operands.
				747	/// \param __b
				748	/// A 128-bit integer vector containing one of the source operands.
				749	/// \returns A 128-bit integer vector containing the products of both operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	750	static __inline__ __m128i __DEFAULT_FN_ATTRS
				751	_mm_mullo_epi16(__m128i __a, __m128i __b)
				752	{
				753	return (__m128i)((__v8hi)__a * (__v8hi)__b);
				754	}
				755
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	756	/// \brief Multiplies 32-bit unsigned integer values contained in the lower bits
				757	/// of the two 64-bit integer vectors and returns the 64-bit unsigned
				758	/// product.
				759	///
				760	/// \headerfile <x86intrin.h>
				761	///
				762	/// This intrinsic corresponds to the \c PMULUDQ instruction.
				763	///
				764	/// \param __a
				765	/// A 64-bit integer containing one of the source operands.
				766	/// \param __b
				767	/// A 64-bit integer containing one of the source operands.
				768	/// \returns A 64-bit integer vector containing the product of both operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	769	static __inline__ __m64 __DEFAULT_FN_ATTRS
				770	_mm_mul_su32(__m64 __a, __m64 __b)
				771	{
				772	return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
				773	}
				774
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	775	/// \brief Multiplies 32-bit unsigned integer values contained in the lower
				776	/// bits of the corresponding elements of two [2 x i64] vectors, and returns
				777	/// the 64-bit products in the corresponding elements of a [2 x i64] vector.
				778	///
				779	/// \headerfile <x86intrin.h>
				780	///
				781	/// This intrinsic corresponds to the \c VPMULUDQ / PMULUDQ instruction.
				782	///
				783	/// \param __a
				784	/// A [2 x i64] vector containing one of the source operands.
				785	/// \param __b
				786	/// A [2 x i64] vector containing one of the source operands.
				787	/// \returns A [2 x i64] vector containing the product of both operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	788	static __inline__ __m128i __DEFAULT_FN_ATTRS
				789	_mm_mul_epu32(__m128i __a, __m128i __b)
				790	{
				791	return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
				792	}
				793
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	794	/// \brief Computes the absolute differences of corresponding 8-bit integer
				795	/// values in two 128-bit vectors. Sums the first 8 absolute differences, and
				796	/// separately sums the second 8 absolute differences. Packss these two
				797	/// unsigned 16-bit integer sums into the upper and lower elements of a
				798	/// [2 x i64] vector.
				799	///
				800	/// \headerfile <x86intrin.h>
				801	///
				802	/// This intrinsic corresponds to the \c VPSADBW / PSADBW instruction.
				803	///
				804	/// \param __a
				805	/// A 128-bit integer vector containing one of the source operands.
				806	/// \param __b
				807	/// A 128-bit integer vector containing one of the source operands.
				808	/// \returns A [2 x i64] vector containing the sums of the sets of absolute
				809	/// differences between both operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	810	static __inline__ __m128i __DEFAULT_FN_ATTRS
				811	_mm_sad_epu8(__m128i __a, __m128i __b)
				812	{
				813	return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
				814	}
				815
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	816	/// \brief Subtracts the corresponding 8-bit integer values in the operands.
				817	///
				818	/// \headerfile <x86intrin.h>
				819	///
				820	/// This intrinsic corresponds to the \c VPSUBB / PSUBB instruction.
				821	///
				822	/// \param __a
				823	/// A 128-bit integer vector containing the minuends.
				824	/// \param __b
				825	/// A 128-bit integer vector containing the subtrahends.
				826	/// \returns A 128-bit integer vector containing the differences of the values
				827	/// in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	828	static __inline__ __m128i __DEFAULT_FN_ATTRS
				829	_mm_sub_epi8(__m128i __a, __m128i __b)
				830	{
				831	return (__m128i)((__v16qi)__a - (__v16qi)__b);
				832	}
				833
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	834	/// \brief Subtracts the corresponding 16-bit integer values in the operands.
				835	///
				836	/// \headerfile <x86intrin.h>
				837	///
				838	/// This intrinsic corresponds to the \c VPSUBW / PSUBW instruction.
				839	///
				840	/// \param __a
				841	/// A 128-bit integer vector containing the minuends.
				842	/// \param __b
				843	/// A 128-bit integer vector containing the subtrahends.
				844	/// \returns A 128-bit integer vector containing the differences of the values
				845	/// in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	846	static __inline__ __m128i __DEFAULT_FN_ATTRS
				847	_mm_sub_epi16(__m128i __a, __m128i __b)
				848	{
				849	return (__m128i)((__v8hi)__a - (__v8hi)__b);
				850	}
				851
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	852	/// \brief Subtracts the corresponding 32-bit integer values in the operands.
				853	///
				854	/// \headerfile <x86intrin.h>
				855	///
				856	/// This intrinsic corresponds to the \c VPSUBD / PSUBD instruction.
				857	///
				858	/// \param __a
				859	/// A 128-bit integer vector containing the minuends.
				860	/// \param __b
				861	/// A 128-bit integer vector containing the subtrahends.
				862	/// \returns A 128-bit integer vector containing the differences of the values
				863	/// in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	864	static __inline__ __m128i __DEFAULT_FN_ATTRS
				865	_mm_sub_epi32(__m128i __a, __m128i __b)
				866	{
				867	return (__m128i)((__v4si)__a - (__v4si)__b);
				868	}
				869
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	870	/// \brief Subtracts signed or unsigned 64-bit integer values and writes the
				871	/// difference to the corresponding bits in the destination.
				872	///
				873	/// \headerfile <x86intrin.h>
				874	///
				875	/// This intrinsic corresponds to the \c PSUBQ instruction.
				876	///
				877	/// \param __a
				878	/// A 64-bit integer vector containing the minuend.
				879	/// \param __b
				880	/// A 64-bit integer vector containing the subtrahend.
				881	/// \returns A 64-bit integer vector containing the difference of the values in
				882	/// the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	883	static __inline__ __m64 __DEFAULT_FN_ATTRS
				884	_mm_sub_si64(__m64 __a, __m64 __b)
				885	{
				886	return (__m64)__builtin_ia32_psubq(__a, __b);
				887	}
				888
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	889	/// \brief Subtracts the corresponding elements of two [2 x i64] vectors.
				890	///
				891	/// \headerfile <x86intrin.h>
				892	///
				893	/// This intrinsic corresponds to the \c VPSUBQ / PSUBQ instruction.
				894	///
				895	/// \param __a
				896	/// A 128-bit integer vector containing the minuends.
				897	/// \param __b
				898	/// A 128-bit integer vector containing the subtrahends.
				899	/// \returns A 128-bit integer vector containing the differences of the values
				900	/// in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	901	static __inline__ __m128i __DEFAULT_FN_ATTRS
				902	_mm_sub_epi64(__m128i __a, __m128i __b)
				903	{
				904	return __a - __b;
				905	}
				906
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	907	/// \brief Subtracts corresponding 8-bit signed integer values in the input and
				908	/// returns the differences in the corresponding bytes in the destination.
				909	/// Differences greater than 7Fh are saturated to 7Fh, and differences less
				910	/// than 80h are saturated to 80h.
				911	///
				912	/// \headerfile <x86intrin.h>
				913	///
				914	/// This intrinsic corresponds to the \c VPSUBSB / PSUBSB instruction.
				915	///
				916	/// \param __a
				917	/// A 128-bit integer vector containing the minuends.
				918	/// \param __b
				919	/// A 128-bit integer vector containing the subtrahends.
				920	/// \returns A 128-bit integer vector containing the differences of the values
				921	/// in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	922	static __inline__ __m128i __DEFAULT_FN_ATTRS
				923	_mm_subs_epi8(__m128i __a, __m128i __b)
				924	{
				925	return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
				926	}
				927
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	928	/// \brief Subtracts corresponding 16-bit signed integer values in the input and
				929	/// returns the differences in the corresponding bytes in the destination.
				930	/// Differences greater than 7FFFh are saturated to 7FFFh, and values less
				931	/// than 8000h are saturated to 8000h.
				932	///
				933	/// \headerfile <x86intrin.h>
				934	///
				935	/// This intrinsic corresponds to the \c VPSUBSW / PSUBSW instruction.
				936	///
				937	/// \param __a
				938	/// A 128-bit integer vector containing the minuends.
				939	/// \param __b
				940	/// A 128-bit integer vector containing the subtrahends.
				941	/// \returns A 128-bit integer vector containing the differences of the values
				942	/// in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	943	static __inline__ __m128i __DEFAULT_FN_ATTRS
				944	_mm_subs_epi16(__m128i __a, __m128i __b)
				945	{
				946	return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
				947	}
				948
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	949	/// \brief Subtracts corresponding 8-bit unsigned integer values in the input
				950	/// and returns the differences in the corresponding bytes in the
				951	/// destination. Differences less than 00h are saturated to 00h.
				952	///
				953	/// \headerfile <x86intrin.h>
				954	///
				955	/// This intrinsic corresponds to the \c VPSUBUSB / PSUBUSB instruction.
				956	///
				957	/// \param __a
				958	/// A 128-bit integer vector containing the minuends.
				959	/// \param __b
				960	/// A 128-bit integer vector containing the subtrahends.
				961	/// \returns A 128-bit integer vector containing the unsigned integer
				962	/// differences of the values in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	963	static __inline__ __m128i __DEFAULT_FN_ATTRS
				964	_mm_subs_epu8(__m128i __a, __m128i __b)
				965	{
				966	return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
				967	}
				968
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	969	/// \brief Subtracts corresponding 16-bit unsigned integer values in the input
				970	/// and returns the differences in the corresponding bytes in the
				971	/// destination. Differences less than 0000h are saturated to 0000h.
				972	///
				973	/// \headerfile <x86intrin.h>
				974	///
				975	/// This intrinsic corresponds to the \c VPSUBUSW / PSUBUSW instruction.
				976	///
				977	/// \param __a
				978	/// A 128-bit integer vector containing the minuends.
				979	/// \param __b
				980	/// A 128-bit integer vector containing the subtrahends.
				981	/// \returns A 128-bit integer vector containing the unsigned integer
				982	/// differences of the values in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	983	static __inline__ __m128i __DEFAULT_FN_ATTRS
				984	_mm_subs_epu16(__m128i __a, __m128i __b)
				985	{
				986	return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
				987	}
				988
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	989	/// \brief Performs a bitwise AND of two 128-bit integer vectors.
				990	///
				991	/// \headerfile <x86intrin.h>
				992	///
				993	/// This intrinsic corresponds to the \c VPAND / PAND instruction.
				994	///
				995	/// \param __a
				996	/// A 128-bit integer vector containing one of the source operands.
				997	/// \param __b
				998	/// A 128-bit integer vector containing one of the source operands.
				999	/// \returns A 128-bit integer vector containing the bitwise AND of the values
				1000	/// in both operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1001	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1002	_mm_and_si128(__m128i __a, __m128i __b)
				1003	{
				1004	return __a & __b;
				1005	}
				1006
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1007	/// \brief Performs a bitwise AND of two 128-bit integer vectors, using the
				1008	/// one's complement of the values contained in the first source operand.
				1009	///
				1010	/// \headerfile <x86intrin.h>
				1011	///
				1012	/// This intrinsic corresponds to the \c VPANDN / PANDN instruction.
				1013	///
				1014	/// \param __a
				1015	/// A 128-bit vector containing the left source operand. The one's complement
				1016	/// of this value is used in the bitwise AND.
				1017	/// \param __b
				1018	/// A 128-bit vector containing the right source operand.
				1019	/// \returns A 128-bit integer vector containing the bitwise AND of the one's
				1020	/// complement of the first operand and the values in the second operand.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1021	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1022	_mm_andnot_si128(__m128i __a, __m128i __b)
				1023	{
				1024	return ~__a & __b;
				1025	}
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1026	/// \brief Performs a bitwise OR of two 128-bit integer vectors.
				1027	///
				1028	/// \headerfile <x86intrin.h>
				1029	///
				1030	/// This intrinsic corresponds to the \c VPOR / POR instruction.
				1031	///
				1032	/// \param __a
				1033	/// A 128-bit integer vector containing one of the source operands.
				1034	/// \param __b
				1035	/// A 128-bit integer vector containing one of the source operands.
				1036	/// \returns A 128-bit integer vector containing the bitwise OR of the values
				1037	/// in both operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1038	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1039	_mm_or_si128(__m128i __a, __m128i __b)
				1040	{
				1041	return __a \| __b;
				1042	}
				1043
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1044	/// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors.
				1045	///
				1046	/// \headerfile <x86intrin.h>
				1047	///
				1048	/// This intrinsic corresponds to the \c VPXOR / PXOR instruction.
				1049	///
				1050	/// \param __a
				1051	/// A 128-bit integer vector containing one of the source operands.
				1052	/// \param __b
				1053	/// A 128-bit integer vector containing one of the source operands.
				1054	/// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
				1055	/// values in both operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1056	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1057	_mm_xor_si128(__m128i __a, __m128i __b)
				1058	{
				1059	return __a ^ __b;
				1060	}
				1061
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1062	/// \brief Left-shifts the 128-bit integer vector operand by the specified
				1063	/// number of bytes. Low-order bits are cleared.
				1064	///
				1065	/// \headerfile <x86intrin.h>
				1066	///
				1067	/// \code
				1068	/// __m128i _mm_slli_si128(__m128i a, const int imm);
				1069	/// \endcode
				1070	///
				1071	/// This intrinsic corresponds to the \c VPSLLDQ / PSLLDQ instruction.
				1072	///
				1073	/// \param a
				1074	/// A 128-bit integer vector containing the source operand.
				1075	/// \param imm
				1076	/// An immediate value specifying the number of bytes to left-shift
				1077	/// operand a.
				1078	/// \returns A 128-bit integer vector containing the left-shifted value.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1079	#define _mm_slli_si128(a, imm) __extension__ ({ \
				1080	(__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(), \
				1081	(__v16qi)(__m128i)(a), \
				1082	((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
				1083	((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
				1084	((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
				1085	((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
				1086	((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
				1087	((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
				1088	((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
				1089	((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
				1090	((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
				1091	((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
				1092	((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
				1093	((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
				1094	((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
				1095	((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
				1096	((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
				1097	((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
				1098
				1099	#define _mm_bslli_si128(a, imm) \
				1100	_mm_slli_si128((a), (imm))
				1101
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1102	/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
				1103	/// by the specified number of bits. Low-order bits are cleared.
				1104	///
				1105	/// \headerfile <x86intrin.h>
				1106	///
				1107	/// This intrinsic corresponds to the \c VPSLLW / PSLLW instruction.
				1108	///
				1109	/// \param __a
				1110	/// A 128-bit integer vector containing the source operand.
				1111	/// \param __count
				1112	/// An integer value specifying the number of bits to left-shift each value
				1113	/// in operand __a.
				1114	/// \returns A 128-bit integer vector containing the left-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1115	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1116	_mm_slli_epi16(__m128i __a, int __count)
				1117	{
				1118	return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
				1119	}
				1120
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1121	/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
				1122	/// by the specified number of bits. Low-order bits are cleared.
				1123	///
				1124	/// \headerfile <x86intrin.h>
				1125	///
				1126	/// This intrinsic corresponds to the \c VPSLLW / PSLLW instruction.
				1127	///
				1128	/// \param __a
				1129	/// A 128-bit integer vector containing the source operand.
				1130	/// \param __count
				1131	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1132	/// to left-shift each value in operand __a.
				1133	/// \returns A 128-bit integer vector containing the left-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1134	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1135	_mm_sll_epi16(__m128i __a, __m128i __count)
				1136	{
				1137	return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
				1138	}
				1139
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1140	/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
				1141	/// by the specified number of bits. Low-order bits are cleared.
				1142	///
				1143	/// \headerfile <x86intrin.h>
				1144	///
				1145	/// This intrinsic corresponds to the \c VPSLLD / PSLLD instruction.
				1146	///
				1147	/// \param __a
				1148	/// A 128-bit integer vector containing the source operand.
				1149	/// \param __count
				1150	/// An integer value specifying the number of bits to left-shift each value
				1151	/// in operand __a.
				1152	/// \returns A 128-bit integer vector containing the left-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1153	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1154	_mm_slli_epi32(__m128i __a, int __count)
				1155	{
				1156	return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
				1157	}
				1158
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1159	/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
				1160	/// by the specified number of bits. Low-order bits are cleared.
				1161	///
				1162	/// \headerfile <x86intrin.h>
				1163	///
				1164	/// This intrinsic corresponds to the \c VPSLLD / PSLLD instruction.
				1165	///
				1166	/// \param __a
				1167	/// A 128-bit integer vector containing the source operand.
				1168	/// \param __count
				1169	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1170	/// to left-shift each value in operand __a.
				1171	/// \returns A 128-bit integer vector containing the left-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1172	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1173	_mm_sll_epi32(__m128i __a, __m128i __count)
				1174	{
				1175	return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
				1176	}
				1177
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1178	/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
				1179	/// by the specified number of bits. Low-order bits are cleared.
				1180	///
				1181	/// \headerfile <x86intrin.h>
				1182	///
				1183	/// This intrinsic corresponds to the \c VPSLLQ / PSLLQ instruction.
				1184	///
				1185	/// \param __a
				1186	/// A 128-bit integer vector containing the source operand.
				1187	/// \param __count
				1188	/// An integer value specifying the number of bits to left-shift each value
				1189	/// in operand __a.
				1190	/// \returns A 128-bit integer vector containing the left-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1191	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1192	_mm_slli_epi64(__m128i __a, int __count)
				1193	{
				1194	return __builtin_ia32_psllqi128(__a, __count);
				1195	}
				1196
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1197	/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
				1198	/// by the specified number of bits. Low-order bits are cleared.
				1199	///
				1200	/// \headerfile <x86intrin.h>
				1201	///
				1202	/// This intrinsic corresponds to the \c VPSLLQ / PSLLQ instruction.
				1203	///
				1204	/// \param __a
				1205	/// A 128-bit integer vector containing the source operand.
				1206	/// \param __count
				1207	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1208	/// to left-shift each value in operand __a.
				1209	/// \returns A 128-bit integer vector containing the left-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1210	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1211	_mm_sll_epi64(__m128i __a, __m128i __count)
				1212	{
				1213	return __builtin_ia32_psllq128(__a, __count);
				1214	}
				1215
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1216	/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
				1217	/// by the specified number of bits. High-order bits are filled with the sign
				1218	/// bit of the initial value.
				1219	///
				1220	/// \headerfile <x86intrin.h>
				1221	///
				1222	/// This intrinsic corresponds to the \c VPSRAW / PSRAW instruction.
				1223	///
				1224	/// \param __a
				1225	/// A 128-bit integer vector containing the source operand.
				1226	/// \param __count
				1227	/// An integer value specifying the number of bits to right-shift each value
				1228	/// in operand __a.
				1229	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1230	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1231	_mm_srai_epi16(__m128i __a, int __count)
				1232	{
				1233	return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
				1234	}
				1235
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1236	/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
				1237	/// by the specified number of bits. High-order bits are filled with the sign
				1238	/// bit of the initial value.
				1239	///
				1240	/// \headerfile <x86intrin.h>
				1241	///
				1242	/// This intrinsic corresponds to the \c VPSRAW / PSRAW instruction.
				1243	///
				1244	/// \param __a
				1245	/// A 128-bit integer vector containing the source operand.
				1246	/// \param __count
				1247	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1248	/// to right-shift each value in operand __a.
				1249	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1250	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1251	_mm_sra_epi16(__m128i __a, __m128i __count)
				1252	{
				1253	return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
				1254	}
				1255
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1256	/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
				1257	/// by the specified number of bits. High-order bits are filled with the sign
				1258	/// bit of the initial value.
				1259	///
				1260	/// \headerfile <x86intrin.h>
				1261	///
				1262	/// This intrinsic corresponds to the \c VPSRAD / PSRAD instruction.
				1263	///
				1264	/// \param __a
				1265	/// A 128-bit integer vector containing the source operand.
				1266	/// \param __count
				1267	/// An integer value specifying the number of bits to right-shift each value
				1268	/// in operand __a.
				1269	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1270	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1271	_mm_srai_epi32(__m128i __a, int __count)
				1272	{
				1273	return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
				1274	}
				1275
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1276	/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
				1277	/// by the specified number of bits. High-order bits are filled with the sign
				1278	/// bit of the initial value.
				1279	///
				1280	/// \headerfile <x86intrin.h>
				1281	///
				1282	/// This intrinsic corresponds to the \c VPSRAD / PSRAD instruction.
				1283	///
				1284	/// \param __a
				1285	/// A 128-bit integer vector containing the source operand.
				1286	/// \param __count
				1287	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1288	/// to right-shift each value in operand __a.
				1289	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1290	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1291	_mm_sra_epi32(__m128i __a, __m128i __count)
				1292	{
				1293	return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
				1294	}
				1295
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1296	/// \brief Right-shifts the 128-bit integer vector operand by the specified
				1297	/// number of bytes. High-order bits are cleared.
				1298	///
				1299	/// \headerfile <x86intrin.h>
				1300	///
				1301	/// \code
				1302	/// __m128i _mm_srli_si128(__m128i a, const int imm);
				1303	/// \endcode
				1304	///
				1305	/// This intrinsic corresponds to the \c VPSRLDQ / PSRLDQ instruction.
				1306	///
				1307	/// \param a
				1308	/// A 128-bit integer vector containing the source operand.
				1309	/// \param imm
				1310	/// An immediate value specifying the number of bytes to right-shift operand
				1311	/// a.
				1312	/// \returns A 128-bit integer vector containing the right-shifted value.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1313	#define _mm_srli_si128(a, imm) __extension__ ({ \
				1314	(__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a), \
				1315	(__v16qi)_mm_setzero_si128(), \
				1316	((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \
				1317	((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \
				1318	((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \
				1319	((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \
				1320	((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \
				1321	((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \
				1322	((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \
				1323	((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \
				1324	((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \
				1325	((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \
				1326	((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
				1327	((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
				1328	((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
				1329	((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
				1330	((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
				1331	((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
				1332
				1333	#define _mm_bsrli_si128(a, imm) \
				1334	_mm_srli_si128((a), (imm))
				1335
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1336	/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
				1337	/// operand by the specified number of bits. High-order bits are cleared.
				1338	///
				1339	/// \headerfile <x86intrin.h>
				1340	///
				1341	/// This intrinsic corresponds to the \c VPSRLW / PSRLW instruction.
				1342	///
				1343	/// \param __a
				1344	/// A 128-bit integer vector containing the source operand.
				1345	/// \param __count
				1346	/// An integer value specifying the number of bits to right-shift each value
				1347	/// in operand __a.
				1348	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1349	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1350	_mm_srli_epi16(__m128i __a, int __count)
				1351	{
				1352	return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
				1353	}
				1354
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1355	/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
				1356	/// operand by the specified number of bits. High-order bits are cleared.
				1357	///
				1358	/// \headerfile <x86intrin.h>
				1359	///
				1360	/// This intrinsic corresponds to the \c VPSRLW / PSRLW instruction.
				1361	///
				1362	/// \param __a
				1363	/// A 128-bit integer vector containing the source operand.
				1364	/// \param __count
				1365	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1366	/// to right-shift each value in operand __a.
				1367	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1368	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1369	_mm_srl_epi16(__m128i __a, __m128i __count)
				1370	{
				1371	return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
				1372	}
				1373
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1374	/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
				1375	/// operand by the specified number of bits. High-order bits are cleared.
				1376	///
				1377	/// \headerfile <x86intrin.h>
				1378	///
				1379	/// This intrinsic corresponds to the \c VPSRLD / PSRLD instruction.
				1380	///
				1381	/// \param __a
				1382	/// A 128-bit integer vector containing the source operand.
				1383	/// \param __count
				1384	/// An integer value specifying the number of bits to right-shift each value
				1385	/// in operand __a.
				1386	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1387	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1388	_mm_srli_epi32(__m128i __a, int __count)
				1389	{
				1390	return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
				1391	}
				1392
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1393	/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
				1394	/// operand by the specified number of bits. High-order bits are cleared.
				1395	///
				1396	/// \headerfile <x86intrin.h>
				1397	///
				1398	/// This intrinsic corresponds to the \c VPSRLD / PSRLD instruction.
				1399	///
				1400	/// \param __a
				1401	/// A 128-bit integer vector containing the source operand.
				1402	/// \param __count
				1403	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1404	/// to right-shift each value in operand __a.
				1405	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1406	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1407	_mm_srl_epi32(__m128i __a, __m128i __count)
				1408	{
				1409	return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
				1410	}
				1411
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1412	/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
				1413	/// operand by the specified number of bits. High-order bits are cleared.
				1414	///
				1415	/// \headerfile <x86intrin.h>
				1416	///
				1417	/// This intrinsic corresponds to the \c VPSRLQ / PSRLQ instruction.
				1418	///
				1419	/// \param __a
				1420	/// A 128-bit integer vector containing the source operand.
				1421	/// \param __count
				1422	/// An integer value specifying the number of bits to right-shift each value
				1423	/// in operand __a.
				1424	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1425	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1426	_mm_srli_epi64(__m128i __a, int __count)
				1427	{
				1428	return __builtin_ia32_psrlqi128(__a, __count);
				1429	}
				1430
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1431	/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
				1432	/// operand by the specified number of bits. High-order bits are cleared.
				1433	///
				1434	/// \headerfile <x86intrin.h>
				1435	///
				1436	/// This intrinsic corresponds to the \c VPSRLQ / PSRLQ instruction.
				1437	///
				1438	/// \param __a
				1439	/// A 128-bit integer vector containing the source operand.
				1440	/// \param __count
				1441	/// A 128-bit integer vector in which bits [63:0] specify the number of bits
				1442	/// to right-shift each value in operand __a.
				1443	/// \returns A 128-bit integer vector containing the right-shifted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1444	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1445	_mm_srl_epi64(__m128i __a, __m128i __count)
				1446	{
				1447	return __builtin_ia32_psrlq128(__a, __count);
				1448	}
				1449
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1450	/// \brief Compares each of the corresponding 8-bit values of the 128-bit
				1451	/// integer vectors for equality. Each comparison yields 0h for false, FFh
				1452	/// for true.
				1453	///
				1454	/// \headerfile <x86intrin.h>
				1455	///
				1456	/// This intrinsic corresponds to the \c VPCMPEQB / PCMPEQB instruction.
				1457	///
				1458	/// \param __a
				1459	/// A 128-bit integer vector.
				1460	/// \param __b
				1461	/// A 128-bit integer vector.
				1462	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1463	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1464	_mm_cmpeq_epi8(__m128i __a, __m128i __b)
				1465	{
				1466	return (__m128i)((__v16qi)__a == (__v16qi)__b);
				1467	}
				1468
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1469	/// \brief Compares each of the corresponding 16-bit values of the 128-bit
				1470	/// integer vectors for equality. Each comparison yields 0h for false, FFFFh
				1471	/// for true.
				1472	///
				1473	/// \headerfile <x86intrin.h>
				1474	///
				1475	/// This intrinsic corresponds to the \c VPCMPEQW / PCMPEQW instruction.
				1476	///
				1477	/// \param __a
				1478	/// A 128-bit integer vector.
				1479	/// \param __b
				1480	/// A 128-bit integer vector.
				1481	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1482	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1483	_mm_cmpeq_epi16(__m128i __a, __m128i __b)
				1484	{
				1485	return (__m128i)((__v8hi)__a == (__v8hi)__b);
				1486	}
				1487
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1488	/// \brief Compares each of the corresponding 32-bit values of the 128-bit
				1489	/// integer vectors for equality. Each comparison yields 0h for false,
				1490	/// FFFFFFFFh for true.
				1491	///
				1492	/// \headerfile <x86intrin.h>
				1493	///
				1494	/// This intrinsic corresponds to the \c VPCMPEQD / PCMPEQD instruction.
				1495	///
				1496	/// \param __a
				1497	/// A 128-bit integer vector.
				1498	/// \param __b
				1499	/// A 128-bit integer vector.
				1500	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1501	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1502	_mm_cmpeq_epi32(__m128i __a, __m128i __b)
				1503	{
				1504	return (__m128i)((__v4si)__a == (__v4si)__b);
				1505	}
				1506
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1507	/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
				1508	/// integer vectors to determine if the values in the first operand are
				1509	/// greater than those in the second operand. Each comparison yields 0h for
				1510	/// false, FFh for true.
				1511	///
				1512	/// \headerfile <x86intrin.h>
				1513	///
				1514	/// This intrinsic corresponds to the \c VPCMPGTB / PCMPGTB instruction.
				1515	///
				1516	/// \param __a
				1517	/// A 128-bit integer vector.
				1518	/// \param __b
				1519	/// A 128-bit integer vector.
				1520	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1521	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1522	_mm_cmpgt_epi8(__m128i __a, __m128i __b)
				1523	{
				1524	/* This function always performs a signed comparison, but __v16qi is a char
				1525	which may be signed or unsigned, so use __v16qs. */
				1526	return (__m128i)((__v16qs)__a > (__v16qs)__b);
				1527	}
				1528
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1529	/// \brief Compares each of the corresponding signed 16-bit values of the
				1530	/// 128-bit integer vectors to determine if the values in the first operand
				1531	/// are greater than those in the second operand. Each comparison yields 0h
				1532	/// for false, FFFFh for true.
				1533	///
				1534	/// \headerfile <x86intrin.h>
				1535	///
				1536	/// This intrinsic corresponds to the \c VPCMPGTW / PCMPGTW instruction.
				1537	///
				1538	/// \param __a
				1539	/// A 128-bit integer vector.
				1540	/// \param __b
				1541	/// A 128-bit integer vector.
				1542	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1543	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1544	_mm_cmpgt_epi16(__m128i __a, __m128i __b)
				1545	{
				1546	return (__m128i)((__v8hi)__a > (__v8hi)__b);
				1547	}
				1548
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1549	/// \brief Compares each of the corresponding signed 32-bit values of the
				1550	/// 128-bit integer vectors to determine if the values in the first operand
				1551	/// are greater than those in the second operand. Each comparison yields 0h
				1552	/// for false, FFFFFFFFh for true.
				1553	///
				1554	/// \headerfile <x86intrin.h>
				1555	///
				1556	/// This intrinsic corresponds to the \c VPCMPGTD / PCMPGTD instruction.
				1557	///
				1558	/// \param __a
				1559	/// A 128-bit integer vector.
				1560	/// \param __b
				1561	/// A 128-bit integer vector.
				1562	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1563	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1564	_mm_cmpgt_epi32(__m128i __a, __m128i __b)
				1565	{
				1566	return (__m128i)((__v4si)__a > (__v4si)__b);
				1567	}
				1568
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1569	/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
				1570	/// integer vectors to determine if the values in the first operand are less
				1571	/// than those in the second operand. Each comparison yields 0h for false,
				1572	/// FFh for true.
				1573	///
				1574	/// \headerfile <x86intrin.h>
				1575	///
				1576	/// This intrinsic corresponds to the \c VPCMPGTB / PCMPGTB instruction.
				1577	///
				1578	/// \param __a
				1579	/// A 128-bit integer vector.
				1580	/// \param __b
				1581	/// A 128-bit integer vector.
				1582	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1583	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1584	_mm_cmplt_epi8(__m128i __a, __m128i __b)
				1585	{
				1586	return _mm_cmpgt_epi8(__b, __a);
				1587	}
				1588
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1589	/// \brief Compares each of the corresponding signed 16-bit values of the
				1590	/// 128-bit integer vectors to determine if the values in the first operand
				1591	/// are less than those in the second operand. Each comparison yields 0h for
				1592	/// false, FFFFh for true.
				1593	///
				1594	/// \headerfile <x86intrin.h>
				1595	///
				1596	/// This intrinsic corresponds to the \c VPCMPGTW / PCMPGTW instruction.
				1597	///
				1598	/// \param __a
				1599	/// A 128-bit integer vector.
				1600	/// \param __b
				1601	/// A 128-bit integer vector.
				1602	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1603	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1604	_mm_cmplt_epi16(__m128i __a, __m128i __b)
				1605	{
				1606	return _mm_cmpgt_epi16(__b, __a);
				1607	}
				1608
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1609	/// \brief Compares each of the corresponding signed 32-bit values of the
				1610	/// 128-bit integer vectors to determine if the values in the first operand
				1611	/// are less than those in the second operand. Each comparison yields 0h for
				1612	/// false, FFFFFFFFh for true.
				1613	///
				1614	/// \headerfile <x86intrin.h>
				1615	///
				1616	/// This intrinsic corresponds to the \c VPCMPGTD / PCMPGTD instruction.
				1617	///
				1618	/// \param __a
				1619	/// A 128-bit integer vector.
				1620	/// \param __b
				1621	/// A 128-bit integer vector.
				1622	/// \returns A 128-bit integer vector containing the comparison results.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1623	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1624	_mm_cmplt_epi32(__m128i __a, __m128i __b)
				1625	{
				1626	return _mm_cmpgt_epi32(__b, __a);
				1627	}
				1628
				1629	#ifdef __x86_64__
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1630	/// \brief Converts a 64-bit signed integer value from the second operand into a
				1631	/// double-precision value and returns it in the lower element of a [2 x
				1632	/// double] vector; the upper element of the returned vector is copied from
				1633	/// the upper element of the first operand.
				1634	///
				1635	/// \headerfile <x86intrin.h>
				1636	///
				1637	/// This intrinsic corresponds to the \c VCVTSI2SD / CVTSI2SD instruction.
				1638	///
				1639	/// \param __a
				1640	/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
				1641	/// copied to the upper 64 bits of the destination.
				1642	/// \param __b
				1643	/// A 64-bit signed integer operand containing the value to be converted.
				1644	/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
				1645	/// converted value of the second operand. The upper 64 bits are copied from
				1646	/// the upper 64 bits of the first operand.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1647	static __inline__ __m128d __DEFAULT_FN_ATTRS
				1648	_mm_cvtsi64_sd(__m128d __a, long long __b)
				1649	{
				1650	__a[0] = __b;
				1651	return __a;
				1652	}
				1653
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1654	/// \brief Converts the first (lower) element of a vector of [2 x double] into a
				1655	/// 64-bit signed integer value, according to the current rounding mode.
				1656	///
				1657	/// \headerfile <x86intrin.h>
				1658	///
				1659	/// This intrinsic corresponds to the \c VCVTSD2SI / CVTSD2SI instruction.
				1660	///
				1661	/// \param __a
				1662	/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
				1663	/// conversion.
				1664	/// \returns A 64-bit signed integer containing the converted value.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1665	static __inline__ long long __DEFAULT_FN_ATTRS
				1666	_mm_cvtsd_si64(__m128d __a)
				1667	{
				1668	return __builtin_ia32_cvtsd2si64(__a);
				1669	}
				1670
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1671	/// \brief Converts the first (lower) element of a vector of [2 x double] into a
				1672	/// 64-bit signed integer value, truncating the result when it is inexact.
				1673	///
				1674	/// \headerfile <x86intrin.h>
				1675	///
				1676	/// This intrinsic corresponds to the \c VCVTTSD2SI / CVTTSD2SI instruction.
				1677	///
				1678	/// \param __a
				1679	/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
				1680	/// conversion.
				1681	/// \returns A 64-bit signed integer containing the converted value.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1682	static __inline__ long long __DEFAULT_FN_ATTRS
				1683	_mm_cvttsd_si64(__m128d __a)
				1684	{
				1685	return __a[0];
				1686	}
				1687	#endif
				1688
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1689	/// \brief Converts a vector of [4 x i32] into a vector of [4 x float].
				1690	///
				1691	/// \headerfile <x86intrin.h>
				1692	///
				1693	/// This intrinsic corresponds to the \c VCVTDQ2PS / CVTDQ2PS instruction.
				1694	///
				1695	/// \param __a
				1696	/// A 128-bit integer vector.
				1697	/// \returns A 128-bit vector of [4 x float] containing the converted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1698	static __inline__ __m128 __DEFAULT_FN_ATTRS
				1699	_mm_cvtepi32_ps(__m128i __a)
				1700	{
				1701	return __builtin_ia32_cvtdq2ps((__v4si)__a);
				1702	}
				1703
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1704	/// \brief Converts a vector of [4 x float] into a vector of [4 x i32].
				1705	///
				1706	/// \headerfile <x86intrin.h>
				1707	///
				1708	/// This intrinsic corresponds to the \c VCVTPS2DQ / CVTPS2DQ instruction.
				1709	///
				1710	/// \param __a
				1711	/// A 128-bit vector of [4 x float].
				1712	/// \returns A 128-bit integer vector of [4 x i32] containing the converted
				1713	/// values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1714	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1715	_mm_cvtps_epi32(__m128 __a)
				1716	{
				1717	return (__m128i)__builtin_ia32_cvtps2dq(__a);
				1718	}
				1719
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1720	/// \brief Converts a vector of [4 x float] into a vector of [4 x i32],
				1721	/// truncating the result when it is inexact.
				1722	///
				1723	/// \headerfile <x86intrin.h>
				1724	///
				1725	/// This intrinsic corresponds to the \c VCVTTPS2DQ / CVTTPS2DQ instruction.
				1726	///
				1727	/// \param __a
				1728	/// A 128-bit vector of [4 x float].
				1729	/// \returns A 128-bit vector of [4 x i32] containing the converted values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1730	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1731	_mm_cvttps_epi32(__m128 __a)
				1732	{
				1733	return (__m128i)__builtin_ia32_cvttps2dq(__a);
				1734	}
				1735
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1736	/// \brief Returns a vector of [4 x i32] where the lowest element is the input
				1737	/// operand and the remaining elements are zero.
				1738	///
				1739	/// \headerfile <x86intrin.h>
				1740	///
				1741	/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
				1742	///
				1743	/// \param __a
				1744	/// A 32-bit signed integer operand.
				1745	/// \returns A 128-bit vector of [4 x i32].
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1746	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1747	_mm_cvtsi32_si128(int __a)
				1748	{
				1749	return (__m128i)(__v4si){ __a, 0, 0, 0 };
				1750	}
				1751
				1752	#ifdef __x86_64__
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1753	/// \brief Returns a vector of [2 x i64] where the lower element is the input
				1754	/// operand and the upper element is zero.
				1755	///
				1756	/// \headerfile <x86intrin.h>
				1757	///
				1758	/// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
				1759	///
				1760	/// \param __a
				1761	/// A 64-bit signed integer operand containing the value to be converted.
				1762	/// \returns A 128-bit vector of [2 x i64] containing the converted value.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1763	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1764	_mm_cvtsi64_si128(long long __a)
				1765	{
				1766	return (__m128i){ __a, 0 };
				1767	}
				1768	#endif
				1769
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1770	/// \brief Moves the least significant 32 bits of a vector of [4 x i32] to a
				1771	/// 32-bit signed integer value.
				1772	///
				1773	/// \headerfile <x86intrin.h>
				1774	///
				1775	/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
				1776	///
				1777	/// \param __a
				1778	/// A vector of [4 x i32]. The least significant 32 bits are moved to the
				1779	/// destination.
				1780	/// \returns A 32-bit signed integer containing the moved value.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1781	static __inline__ int __DEFAULT_FN_ATTRS
				1782	_mm_cvtsi128_si32(__m128i __a)
				1783	{
				1784	__v4si __b = (__v4si)__a;
				1785	return __b[0];
				1786	}
				1787
				1788	#ifdef __x86_64__
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1789	/// \brief Moves the least significant 64 bits of a vector of [2 x i64] to a
				1790	/// 64-bit signed integer value.
				1791	///
				1792	/// \headerfile <x86intrin.h>
				1793	///
				1794	/// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
				1795	///
				1796	/// \param __a
				1797	/// A vector of [2 x i64]. The least significant 64 bits are moved to the
				1798	/// destination.
				1799	/// \returns A 64-bit signed integer containing the moved value.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1800	static __inline__ long long __DEFAULT_FN_ATTRS
				1801	_mm_cvtsi128_si64(__m128i __a)
				1802	{
				1803	return __a[0];
				1804	}
				1805	#endif
				1806
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1807	/// \brief Moves packed integer values from an aligned 128-bit memory location
				1808	/// to elements in a 128-bit integer vector.
				1809	///
				1810	/// \headerfile <x86intrin.h>
				1811	///
				1812	/// This intrinsic corresponds to the \c VMOVDQA / MOVDQA instruction.
				1813	///
				1814	/// \param __p
				1815	/// An aligned pointer to a memory location containing integer values.
				1816	/// \returns A 128-bit integer vector containing the moved values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1817	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1818	_mm_load_si128(__m128i const *__p)
				1819	{
				1820	return *__p;
				1821	}
				1822
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1823	/// \brief Moves packed integer values from an unaligned 128-bit memory location
				1824	/// to elements in a 128-bit integer vector.
				1825	///
				1826	/// \headerfile <x86intrin.h>
				1827	///
				1828	/// This intrinsic corresponds to the \c VMOVDQU / MOVDQU instruction.
				1829	///
				1830	/// \param __p
				1831	/// A pointer to a memory location containing integer values.
				1832	/// \returns A 128-bit integer vector containing the moved values.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1833	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1834	_mm_loadu_si128(__m128i const *__p)
				1835	{
				1836	struct __loadu_si128 {
				1837	__m128i __v;
				1838	} __attribute__((__packed__, __may_alias__));
				1839	return ((struct __loadu_si128*)__p)->__v;
				1840	}
				1841
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1842	/// \brief Returns a vector of [2 x i64] where the lower element is taken from
				1843	/// the lower element of the operand, and the upper element is zero.
				1844	///
				1845	/// \headerfile <x86intrin.h>
				1846	///
				1847	/// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
				1848	///
				1849	/// \param __p
				1850	/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
				1851	/// the destination.
				1852	/// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
				1853	/// moved value. The higher order bits are cleared.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1854	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1855	_mm_loadl_epi64(__m128i const *__p)
				1856	{
				1857	struct __mm_loadl_epi64_struct {
				1858	long long __u;
				1859	} __attribute__((__packed__, __may_alias__));
				1860	return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
				1861	}
				1862
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1863	/// \brief Generates a 128-bit vector of [4 x i32] with unspecified content.
				1864	/// This could be used as an argument to another intrinsic function where the
				1865	/// argument is required but the value is not actually used.
				1866	///
				1867	/// \headerfile <x86intrin.h>
				1868	///
				1869	/// This intrinsic has no corresponding instruction.
				1870	///
				1871	/// \returns A 128-bit vector of [4 x i32] with unspecified content.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1872	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1873	_mm_undefined_si128()
				1874	{
				1875	return (__m128i)__builtin_ia32_undef128();
				1876	}
				1877
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1878	/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
				1879	/// the specified 64-bit integer values.
				1880	///
				1881	/// \headerfile <x86intrin.h>
				1882	///
				1883	/// This intrinsic is a utility function and does not correspond to a specific
				1884	/// instruction.
				1885	///
				1886	/// \param __q1
				1887	/// A 64-bit integer value used to initialize the upper 64 bits of the
				1888	/// destination vector of [2 x i64].
				1889	/// \param __q0
				1890	/// A 64-bit integer value used to initialize the lower 64 bits of the
				1891	/// destination vector of [2 x i64].
				1892	/// \returns An initialized 128-bit vector of [2 x i64] containing the values
				1893	/// provided in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1894	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1895	_mm_set_epi64x(long long __q1, long long __q0)
				1896	{
				1897	return (__m128i){ __q0, __q1 };
				1898	}
				1899
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1900	/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
				1901	/// the specified 64-bit integer values.
				1902	///
				1903	/// \headerfile <x86intrin.h>
				1904	///
				1905	/// This intrinsic is a utility function and does not correspond to a specific
				1906	/// instruction.
				1907	///
				1908	/// \param __q1
				1909	/// A 64-bit integer value used to initialize the upper 64 bits of the
				1910	/// destination vector of [2 x i64].
				1911	/// \param __q0
				1912	/// A 64-bit integer value used to initialize the lower 64 bits of the
				1913	/// destination vector of [2 x i64].
				1914	/// \returns An initialized 128-bit vector of [2 x i64] containing the values
				1915	/// provided in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1916	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1917	_mm_set_epi64(__m64 __q1, __m64 __q0)
				1918	{
				1919	return (__m128i){ (long long)__q0, (long long)__q1 };
				1920	}
				1921
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1922	/// \brief Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
				1923	/// the specified 32-bit integer values.
				1924	///
				1925	/// \headerfile <x86intrin.h>
				1926	///
				1927	/// This intrinsic is a utility function and does not correspond to a specific
				1928	/// instruction.
				1929	///
				1930	/// \param __i3
				1931	/// A 32-bit integer value used to initialize bits [127:96] of the
				1932	/// destination vector.
				1933	/// \param __i2
				1934	/// A 32-bit integer value used to initialize bits [95:64] of the destination
				1935	/// vector.
				1936	/// \param __i1
				1937	/// A 32-bit integer value used to initialize bits [63:32] of the destination
				1938	/// vector.
				1939	/// \param __i0
				1940	/// A 32-bit integer value used to initialize bits [31:0] of the destination
				1941	/// vector.
				1942	/// \returns An initialized 128-bit vector of [4 x i32] containing the values
				1943	/// provided in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1944	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1945	_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
				1946	{
				1947	return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
				1948	}
				1949
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1950	/// \brief Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
				1951	/// the specified 16-bit integer values.
				1952	///
				1953	/// \headerfile <x86intrin.h>
				1954	///
				1955	/// This intrinsic is a utility function and does not correspond to a specific
				1956	/// instruction.
				1957	///
				1958	/// \param __w7
				1959	/// A 16-bit integer value used to initialize bits [127:112] of the
				1960	/// destination vector.
				1961	/// \param __w6
				1962	/// A 16-bit integer value used to initialize bits [111:96] of the
				1963	/// destination vector.
				1964	/// \param __w5
				1965	/// A 16-bit integer value used to initialize bits [95:80] of the destination
				1966	/// vector.
				1967	/// \param __w4
				1968	/// A 16-bit integer value used to initialize bits [79:64] of the destination
				1969	/// vector.
				1970	/// \param __w3
				1971	/// A 16-bit integer value used to initialize bits [63:48] of the destination
				1972	/// vector.
				1973	/// \param __w2
				1974	/// A 16-bit integer value used to initialize bits [47:32] of the destination
				1975	/// vector.
				1976	/// \param __w1
				1977	/// A 16-bit integer value used to initialize bits [31:16] of the destination
				1978	/// vector.
				1979	/// \param __w0
				1980	/// A 16-bit integer value used to initialize bits [15:0] of the destination
				1981	/// vector.
				1982	/// \returns An initialized 128-bit vector of [8 x i16] containing the values
				1983	/// provided in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1984	static __inline__ __m128i __DEFAULT_FN_ATTRS
				1985	_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
				1986	{
				1987	return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
				1988	}
				1989
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1990	/// \brief Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
				1991	/// the specified 8-bit integer values.
				1992	///
				1993	/// \headerfile <x86intrin.h>
				1994	///
				1995	/// This intrinsic is a utility function and does not correspond to a specific
				1996	/// instruction.
				1997	///
				1998	/// \param __b15
				1999	/// Initializes bits [127:120] of the destination vector.
				2000	/// \param __b14
				2001	/// Initializes bits [119:112] of the destination vector.
				2002	/// \param __b13
				2003	/// Initializes bits [111:104] of the destination vector.
				2004	/// \param __b12
				2005	/// Initializes bits [103:96] of the destination vector.
				2006	/// \param __b11
				2007	/// Initializes bits [95:88] of the destination vector.
				2008	/// \param __b10
				2009	/// Initializes bits [87:80] of the destination vector.
				2010	/// \param __b9
				2011	/// Initializes bits [79:72] of the destination vector.
				2012	/// \param __b8
				2013	/// Initializes bits [71:64] of the destination vector.
				2014	/// \param __b7
				2015	/// Initializes bits [63:56] of the destination vector.
				2016	/// \param __b6
				2017	/// Initializes bits [55:48] of the destination vector.
				2018	/// \param __b5
				2019	/// Initializes bits [47:40] of the destination vector.
				2020	/// \param __b4
				2021	/// Initializes bits [39:32] of the destination vector.
				2022	/// \param __b3
				2023	/// Initializes bits [31:24] of the destination vector.
				2024	/// \param __b2
				2025	/// Initializes bits [23:16] of the destination vector.
				2026	/// \param __b1
				2027	/// Initializes bits [15:8] of the destination vector.
				2028	/// \param __b0
				2029	/// Initializes bits [7:0] of the destination vector.
				2030	/// \returns An initialized 128-bit vector of [16 x i8] containing the values
				2031	/// provided in the operands.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	2032	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2033	_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
				2034	{
				2035	return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
				2036	}
				2037
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	2038	/// \brief Initializes both values in a 128-bit integer vector with the
				2039	/// specified 64-bit integer value.
				2040	///
				2041	/// \headerfile <x86intrin.h>
				2042	///
				2043	/// This intrinsic is a utility function and does not correspond to a specific
				2044	/// instruction.
				2045	///
				2046	/// \param __q
				2047	/// Integer value used to initialize the elements of the destination integer
				2048	/// vector.
				2049	/// \returns An initialized 128-bit integer vector of [2 x i64] with both
				2050	/// elements containing the value provided in the operand.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	2051	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2052	_mm_set1_epi64x(long long __q)
				2053	{
				2054	return (__m128i){ __q, __q };
				2055	}
				2056
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	2057	/// \brief Initializes both values in a 128-bit vector of [2 x i64] with the
				2058	/// specified 64-bit value.
				2059	///
				2060	/// \headerfile <x86intrin.h>
				2061	///
				2062	/// This intrinsic is a utility function and does not correspond to a specific
				2063	/// instruction.
				2064	///
				2065	/// \param __q
				2066	/// A 64-bit value used to initialize the elements of the destination integer
				2067	/// vector.
				2068	/// \returns An initialized 128-bit vector of [2 x i64] with all elements
				2069	/// containing the value provided in the operand.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	2070	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2071	_mm_set1_epi64(__m64 __q)
				2072	{
				2073	return (__m128i){ (long long)__q, (long long)__q };
				2074	}
				2075
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	2076	/// \brief Initializes all values in a 128-bit vector of [4 x i32] with the
				2077	/// specified 32-bit value.
				2078	///
				2079	/// \headerfile <x86intrin.h>
				2080	///
				2081	/// This intrinsic is a utility function and does not correspond to a specific
				2082	/// instruction.
				2083	///
				2084	/// \param __i
				2085	/// A 32-bit value used to initialize the elements of the destination integer
				2086	/// vector.
				2087	/// \returns An initialized 128-bit vector of [4 x i32] with all elements
				2088	/// containing the value provided in the operand.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	2089	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2090	_mm_set1_epi32(int __i)
				2091	{
				2092	return (__m128i)(__v4si){ __i, __i, __i, __i };
				2093	}
				2094
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	2095	/// \brief Initializes all values in a 128-bit vector of [8 x i16] with the
				2096	/// specified 16-bit value.
				2097	///
				2098	/// \headerfile <x86intrin.h>
				2099	///
				2100	/// This intrinsic is a utility function and does not correspond to a specific
				2101	/// instruction.
				2102	///
				2103	/// \param __w
				2104	/// A 16-bit value used to initialize the elements of the destination integer
				2105	/// vector.
				2106	/// \returns An initialized 128-bit vector of [8 x i16] with all elements
				2107	/// containing the value provided in the operand.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	2108	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2109	_mm_set1_epi16(short __w)
				2110	{
				2111	return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
				2112	}
				2113
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	2114	/// \brief Initializes all values in a 128-bit vector of [16 x i8] with the
				2115	/// specified 8-bit value.
				2116	///
				2117	/// \headerfile <x86intrin.h>
				2118	///
				2119	/// This intrinsic is a utility function and does not correspond to a specific
				2120	/// instruction.
				2121	///
				2122	/// \param __b
				2123	/// An 8-bit value used to initialize the elements of the destination integer
				2124	/// vector.
				2125	/// \returns An initialized 128-bit vector of [16 x i8] with all elements
				2126	/// containing the value provided in the operand.
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	2127	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2128	_mm_set1_epi8(char __b)
				2129	{
				2130	return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
				2131	}
				2132
				2133	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2134	_mm_setr_epi64(__m64 __q0, __m64 __q1)
				2135	{
				2136	return (__m128i){ (long long)__q0, (long long)__q1 };
				2137	}
				2138
				2139	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2140	_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
				2141	{
				2142	return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
				2143	}
				2144
				2145	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2146	_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
				2147	{
				2148	return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
				2149	}
				2150
				2151	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2152	_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
				2153	{
				2154	return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
				2155	}
				2156
				2157	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2158	_mm_setzero_si128(void)
				2159	{
				2160	return (__m128i){ 0LL, 0LL };
				2161	}
				2162
				2163	static __inline__ void __DEFAULT_FN_ATTRS
				2164	_mm_store_si128(__m128i *__p, __m128i __b)
				2165	{
				2166	*__p = __b;
				2167	}
				2168
				2169	static __inline__ void __DEFAULT_FN_ATTRS
				2170	_mm_storeu_si128(__m128i *__p, __m128i __b)
				2171	{
				2172	__builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
				2173	}
				2174
				2175	static __inline__ void __DEFAULT_FN_ATTRS
				2176	_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
				2177	{
				2178	__builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
				2179	}
				2180
				2181	static __inline__ void __DEFAULT_FN_ATTRS
				2182	_mm_storel_epi64(__m128i *__p, __m128i __a)
				2183	{
				2184	struct __mm_storel_epi64_struct {
				2185	long long __u;
				2186	} __attribute__((__packed__, __may_alias__));
				2187	((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
				2188	}
				2189
				2190	static __inline__ void __DEFAULT_FN_ATTRS
				2191	_mm_stream_pd(double *__p, __m128d __a)
				2192	{
				2193	__builtin_ia32_movntpd(__p, __a);
				2194	}
				2195
				2196	static __inline__ void __DEFAULT_FN_ATTRS
				2197	_mm_stream_si128(__m128i *__p, __m128i __a)
				2198	{
				2199	__builtin_ia32_movntdq(__p, __a);
				2200	}
				2201
				2202	static __inline__ void __DEFAULT_FN_ATTRS
				2203	_mm_stream_si32(int *__p, int __a)
				2204	{
				2205	__builtin_ia32_movnti(__p, __a);
				2206	}
				2207
				2208	#ifdef __x86_64__
				2209	static __inline__ void __DEFAULT_FN_ATTRS
				2210	_mm_stream_si64(long long *__p, long long __a)
				2211	{
				2212	__builtin_ia32_movnti64(__p, __a);
				2213	}
				2214	#endif
				2215
				2216	static __inline__ void __DEFAULT_FN_ATTRS
				2217	_mm_clflush(void const *__p)
				2218	{
				2219	__builtin_ia32_clflush(__p);
				2220	}
				2221
				2222	static __inline__ void __DEFAULT_FN_ATTRS
				2223	_mm_lfence(void)
				2224	{
				2225	__builtin_ia32_lfence();
				2226	}
				2227
				2228	static __inline__ void __DEFAULT_FN_ATTRS
				2229	_mm_mfence(void)
				2230	{
				2231	__builtin_ia32_mfence();
				2232	}
				2233
				2234	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2235	_mm_packs_epi16(__m128i __a, __m128i __b)
				2236	{
				2237	return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
				2238	}
				2239
				2240	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2241	_mm_packs_epi32(__m128i __a, __m128i __b)
				2242	{
				2243	return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
				2244	}
				2245
				2246	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2247	_mm_packus_epi16(__m128i __a, __m128i __b)
				2248	{
				2249	return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
				2250	}
				2251
				2252	static __inline__ int __DEFAULT_FN_ATTRS
				2253	_mm_extract_epi16(__m128i __a, int __imm)
				2254	{
				2255	__v8hi __b = (__v8hi)__a;
				2256	return (unsigned short)__b[__imm & 7];
				2257	}
				2258
				2259	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2260	_mm_insert_epi16(__m128i __a, int __b, int __imm)
				2261	{
				2262	__v8hi __c = (__v8hi)__a;
				2263	__c[__imm & 7] = __b;
				2264	return (__m128i)__c;
				2265	}
				2266
				2267	static __inline__ int __DEFAULT_FN_ATTRS
				2268	_mm_movemask_epi8(__m128i __a)
				2269	{
				2270	return __builtin_ia32_pmovmskb128((__v16qi)__a);
				2271	}
				2272
				2273	#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
				2274	(__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
				2275	(__v4si)_mm_setzero_si128(), \
				2276	(imm) & 0x3, ((imm) & 0xc) >> 2, \
				2277	((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
				2278
				2279	#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
				2280	(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
				2281	(__v8hi)_mm_setzero_si128(), \
				2282	(imm) & 0x3, ((imm) & 0xc) >> 2, \
				2283	((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
				2284	4, 5, 6, 7); })
				2285
				2286	#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
				2287	(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
				2288	(__v8hi)_mm_setzero_si128(), \
				2289	0, 1, 2, 3, \
				2290	4 + (((imm) & 0x03) >> 0), \
				2291	4 + (((imm) & 0x0c) >> 2), \
				2292	4 + (((imm) & 0x30) >> 4), \
				2293	4 + (((imm) & 0xc0) >> 6)); })
				2294
				2295	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2296	_mm_unpackhi_epi8(__m128i __a, __m128i __b)
				2297	{
				2298	return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
				2299	}
				2300
				2301	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2302	_mm_unpackhi_epi16(__m128i __a, __m128i __b)
				2303	{
				2304	return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
				2305	}
				2306
				2307	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2308	_mm_unpackhi_epi32(__m128i __a, __m128i __b)
				2309	{
				2310	return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
				2311	}
				2312
				2313	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2314	_mm_unpackhi_epi64(__m128i __a, __m128i __b)
				2315	{
				2316	return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
				2317	}
				2318
				2319	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2320	_mm_unpacklo_epi8(__m128i __a, __m128i __b)
				2321	{
				2322	return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
				2323	}
				2324
				2325	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2326	_mm_unpacklo_epi16(__m128i __a, __m128i __b)
				2327	{
				2328	return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
				2329	}
				2330
				2331	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2332	_mm_unpacklo_epi32(__m128i __a, __m128i __b)
				2333	{
				2334	return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
				2335	}
				2336
				2337	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2338	_mm_unpacklo_epi64(__m128i __a, __m128i __b)
				2339	{
				2340	return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
				2341	}
				2342
				2343	static __inline__ __m64 __DEFAULT_FN_ATTRS
				2344	_mm_movepi64_pi64(__m128i __a)
				2345	{
				2346	return (__m64)__a[0];
				2347	}
				2348
				2349	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2350	_mm_movpi64_epi64(__m64 __a)
				2351	{
				2352	return (__m128i){ (long long)__a, 0 };
				2353	}
				2354
				2355	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2356	_mm_move_epi64(__m128i __a)
				2357	{
				2358	return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
				2359	}
				2360
				2361	static __inline__ __m128d __DEFAULT_FN_ATTRS
				2362	_mm_unpackhi_pd(__m128d __a, __m128d __b)
				2363	{
				2364	return __builtin_shufflevector(__a, __b, 1, 2+1);
				2365	}
				2366
				2367	static __inline__ __m128d __DEFAULT_FN_ATTRS
				2368	_mm_unpacklo_pd(__m128d __a, __m128d __b)
				2369	{
				2370	return __builtin_shufflevector(__a, __b, 0, 2+0);
				2371	}
				2372
				2373	static __inline__ int __DEFAULT_FN_ATTRS
				2374	_mm_movemask_pd(__m128d __a)
				2375	{
				2376	return __builtin_ia32_movmskpd(__a);
				2377	}
				2378
				2379	#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
				2380	(__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
				2381	(i) & 1, (((i) & 2) >> 1) + 2); })
				2382
				2383	static __inline__ __m128 __DEFAULT_FN_ATTRS
				2384	_mm_castpd_ps(__m128d __a)
				2385	{
				2386	return (__m128)__a;
				2387	}
				2388
				2389	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2390	_mm_castpd_si128(__m128d __a)
				2391	{
				2392	return (__m128i)__a;
				2393	}
				2394
				2395	static __inline__ __m128d __DEFAULT_FN_ATTRS
				2396	_mm_castps_pd(__m128 __a)
				2397	{
				2398	return (__m128d)__a;
				2399	}
				2400
				2401	static __inline__ __m128i __DEFAULT_FN_ATTRS
				2402	_mm_castps_si128(__m128 __a)
				2403	{
				2404	return (__m128i)__a;
				2405	}
				2406
				2407	static __inline__ __m128 __DEFAULT_FN_ATTRS
				2408	_mm_castsi128_ps(__m128i __a)
				2409	{
				2410	return (__m128)__a;
				2411	}
				2412
				2413	static __inline__ __m128d __DEFAULT_FN_ATTRS
				2414	_mm_castsi128_pd(__m128i __a)
				2415	{
				2416	return (__m128d)__a;
				2417	}
				2418
				2419	static __inline__ void __DEFAULT_FN_ATTRS
				2420	_mm_pause(void)
				2421	{
				2422	__builtin_ia32_pause();
				2423	}
				2424
				2425	#undef __DEFAULT_FN_ATTRS
				2426
				2427	#define _MM_SHUFFLE2(x, y) (((x) << 1) \| (y))
				2428
				2429	#endif /* __EMMINTRIN_H */