Blame - darwin-x86/lib64/clang/14.0.2/include/tmmintrin.h - platform/prebuilts/clang-tools

blob: bcffa8187801c7e65e3131e5ac4b1a8594b015f3 [file] [log] [blame]

Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	1	/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
				2	*
Logan Chien	df4f766	2019-09-04 16:45:23 -0700	[diff] [blame]	3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	* See https://llvm.org/LICENSE.txt for license information.
				5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	6	*
				7	*===-----------------------------------------------------------------------===
				8	*/
				9
				10	#ifndef __TMMINTRIN_H
				11	#define __TMMINTRIN_H
				12
Pirama Arumuga Nainar	ec8c89d	2022-02-23 09:26:16 -0800	[diff] [blame]	13	#if !defined(__i386__) && !defined(__x86_64__)
				14	#error "This header is only meant to be used on x86 and x64 architecture"
				15	#endif
				16
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	17	#include <pmmintrin.h>
				18
				19	/* Define the default attributes for the functions in this file. */
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	20	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
				21	#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	22
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	23	/// Computes the absolute value of each of the packed 8-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	24	/// integers in the source operand and stores the 8-bit unsigned integer
				25	/// results in the destination.
				26	///
				27	/// \headerfile <x86intrin.h>
				28	///
				29	/// This intrinsic corresponds to the \c PABSB instruction.
				30	///
				31	/// \param __a
				32	/// A 64-bit vector of [8 x i8].
				33	/// \returns A 64-bit integer vector containing the absolute values of the
				34	/// elements in the operand.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	35	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	36	_mm_abs_pi8(__m64 __a)
				37	{
				38	return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
				39	}
				40
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	41	/// Computes the absolute value of each of the packed 8-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	42	/// integers in the source operand and stores the 8-bit unsigned integer
				43	/// results in the destination.
				44	///
				45	/// \headerfile <x86intrin.h>
				46	///
				47	/// This intrinsic corresponds to the \c VPABSB instruction.
				48	///
				49	/// \param __a
				50	/// A 128-bit vector of [16 x i8].
				51	/// \returns A 128-bit integer vector containing the absolute values of the
				52	/// elements in the operand.
				53	static __inline__ __m128i __DEFAULT_FN_ATTRS
				54	_mm_abs_epi8(__m128i __a)
				55	{
				56	return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
				57	}
				58
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	59	/// Computes the absolute value of each of the packed 16-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	60	/// integers in the source operand and stores the 16-bit unsigned integer
				61	/// results in the destination.
				62	///
				63	/// \headerfile <x86intrin.h>
				64	///
				65	/// This intrinsic corresponds to the \c PABSW instruction.
				66	///
				67	/// \param __a
				68	/// A 64-bit vector of [4 x i16].
				69	/// \returns A 64-bit integer vector containing the absolute values of the
				70	/// elements in the operand.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	71	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	72	_mm_abs_pi16(__m64 __a)
				73	{
				74	return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
				75	}
				76
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	77	/// Computes the absolute value of each of the packed 16-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	78	/// integers in the source operand and stores the 16-bit unsigned integer
				79	/// results in the destination.
				80	///
				81	/// \headerfile <x86intrin.h>
				82	///
				83	/// This intrinsic corresponds to the \c VPABSW instruction.
				84	///
				85	/// \param __a
				86	/// A 128-bit vector of [8 x i16].
				87	/// \returns A 128-bit integer vector containing the absolute values of the
				88	/// elements in the operand.
				89	static __inline__ __m128i __DEFAULT_FN_ATTRS
				90	_mm_abs_epi16(__m128i __a)
				91	{
				92	return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
				93	}
				94
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	95	/// Computes the absolute value of each of the packed 32-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	96	/// integers in the source operand and stores the 32-bit unsigned integer
				97	/// results in the destination.
				98	///
				99	/// \headerfile <x86intrin.h>
				100	///
				101	/// This intrinsic corresponds to the \c PABSD instruction.
				102	///
				103	/// \param __a
				104	/// A 64-bit vector of [2 x i32].
				105	/// \returns A 64-bit integer vector containing the absolute values of the
				106	/// elements in the operand.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	107	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	108	_mm_abs_pi32(__m64 __a)
				109	{
				110	return (__m64)__builtin_ia32_pabsd((__v2si)__a);
				111	}
				112
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	113	/// Computes the absolute value of each of the packed 32-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	114	/// integers in the source operand and stores the 32-bit unsigned integer
				115	/// results in the destination.
				116	///
				117	/// \headerfile <x86intrin.h>
				118	///
				119	/// This intrinsic corresponds to the \c VPABSD instruction.
				120	///
				121	/// \param __a
				122	/// A 128-bit vector of [4 x i32].
				123	/// \returns A 128-bit integer vector containing the absolute values of the
				124	/// elements in the operand.
				125	static __inline__ __m128i __DEFAULT_FN_ATTRS
				126	_mm_abs_epi32(__m128i __a)
				127	{
				128	return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
				129	}
				130
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	131	/// Concatenates the two 128-bit integer vector operands, and
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	132	/// right-shifts the result by the number of bytes specified in the immediate
				133	/// operand.
				134	///
				135	/// \headerfile <x86intrin.h>
				136	///
				137	/// \code
				138	/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
				139	/// \endcode
				140	///
				141	/// This intrinsic corresponds to the \c PALIGNR instruction.
				142	///
				143	/// \param a
				144	/// A 128-bit vector of [16 x i8] containing one of the source operands.
				145	/// \param b
				146	/// A 128-bit vector of [16 x i8] containing one of the source operands.
				147	/// \param n
				148	/// An immediate operand specifying how many bytes to right-shift the result.
				149	/// \returns A 128-bit integer vector containing the concatenated right-shifted
				150	/// value.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	151	#define _mm_alignr_epi8(a, b, n) \
Pirama Arumuga Nainar	494f645	2021-12-02 10:42:14 -0800	[diff] [blame]	152	((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
				153	(__v16qi)(__m128i)(b), (n)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	154
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	155	/// Concatenates the two 64-bit integer vector operands, and right-shifts
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	156	/// the result by the number of bytes specified in the immediate operand.
				157	///
				158	/// \headerfile <x86intrin.h>
				159	///
				160	/// \code
				161	/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
				162	/// \endcode
				163	///
				164	/// This intrinsic corresponds to the \c PALIGNR instruction.
				165	///
				166	/// \param a
				167	/// A 64-bit vector of [8 x i8] containing one of the source operands.
				168	/// \param b
				169	/// A 64-bit vector of [8 x i8] containing one of the source operands.
				170	/// \param n
				171	/// An immediate operand specifying how many bytes to right-shift the result.
				172	/// \returns A 64-bit integer vector containing the concatenated right-shifted
				173	/// value.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	174	#define _mm_alignr_pi8(a, b, n) \
Pirama Arumuga Nainar	494f645	2021-12-02 10:42:14 -0800	[diff] [blame]	175	((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	176
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	177	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	178	/// 128-bit vectors of [8 x i16].
				179	///
				180	/// \headerfile <x86intrin.h>
				181	///
				182	/// This intrinsic corresponds to the \c VPHADDW instruction.
				183	///
				184	/// \param __a
				185	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				186	/// horizontal sums of the values are stored in the lower bits of the
				187	/// destination.
				188	/// \param __b
				189	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				190	/// horizontal sums of the values are stored in the upper bits of the
				191	/// destination.
				192	/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
				193	/// both operands.
				194	static __inline__ __m128i __DEFAULT_FN_ATTRS
				195	_mm_hadd_epi16(__m128i __a, __m128i __b)
				196	{
				197	return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
				198	}
				199
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	200	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	201	/// 128-bit vectors of [4 x i32].
				202	///
				203	/// \headerfile <x86intrin.h>
				204	///
				205	/// This intrinsic corresponds to the \c VPHADDD instruction.
				206	///
				207	/// \param __a
				208	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				209	/// horizontal sums of the values are stored in the lower bits of the
				210	/// destination.
				211	/// \param __b
				212	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				213	/// horizontal sums of the values are stored in the upper bits of the
				214	/// destination.
				215	/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
				216	/// both operands.
				217	static __inline__ __m128i __DEFAULT_FN_ATTRS
				218	_mm_hadd_epi32(__m128i __a, __m128i __b)
				219	{
				220	return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
				221	}
				222
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	223	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	224	/// 64-bit vectors of [4 x i16].
				225	///
				226	/// \headerfile <x86intrin.h>
				227	///
				228	/// This intrinsic corresponds to the \c PHADDW instruction.
				229	///
				230	/// \param __a
				231	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				232	/// horizontal sums of the values are stored in the lower bits of the
				233	/// destination.
				234	/// \param __b
				235	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				236	/// horizontal sums of the values are stored in the upper bits of the
				237	/// destination.
				238	/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
				239	/// operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	240	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	241	_mm_hadd_pi16(__m64 __a, __m64 __b)
				242	{
				243	return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
				244	}
				245
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	246	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	247	/// 64-bit vectors of [2 x i32].
				248	///
				249	/// \headerfile <x86intrin.h>
				250	///
				251	/// This intrinsic corresponds to the \c PHADDD instruction.
				252	///
				253	/// \param __a
				254	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				255	/// horizontal sums of the values are stored in the lower bits of the
				256	/// destination.
				257	/// \param __b
				258	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				259	/// horizontal sums of the values are stored in the upper bits of the
				260	/// destination.
				261	/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
				262	/// operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	263	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	264	_mm_hadd_pi32(__m64 __a, __m64 __b)
				265	{
				266	return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
				267	}
				268
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	269	/// Horizontally adds the adjacent pairs of values contained in 2 packed
				270	/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
				271	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
				272	/// 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	273	///
				274	/// \headerfile <x86intrin.h>
				275	///
				276	/// This intrinsic corresponds to the \c VPHADDSW instruction.
				277	///
				278	/// \param __a
				279	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				280	/// horizontal sums of the values are stored in the lower bits of the
				281	/// destination.
				282	/// \param __b
				283	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				284	/// horizontal sums of the values are stored in the upper bits of the
				285	/// destination.
				286	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
				287	/// sums of both operands.
				288	static __inline__ __m128i __DEFAULT_FN_ATTRS
				289	_mm_hadds_epi16(__m128i __a, __m128i __b)
				290	{
				291	return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
				292	}
				293
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	294	/// Horizontally adds the adjacent pairs of values contained in 2 packed
				295	/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
				296	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
				297	/// 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	298	///
				299	/// \headerfile <x86intrin.h>
				300	///
				301	/// This intrinsic corresponds to the \c PHADDSW instruction.
				302	///
				303	/// \param __a
				304	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				305	/// horizontal sums of the values are stored in the lower bits of the
				306	/// destination.
				307	/// \param __b
				308	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				309	/// horizontal sums of the values are stored in the upper bits of the
				310	/// destination.
				311	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
				312	/// sums of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	313	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	314	_mm_hadds_pi16(__m64 __a, __m64 __b)
				315	{
				316	return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
				317	}
				318
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	319	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	320	/// packed 128-bit vectors of [8 x i16].
				321	///
				322	/// \headerfile <x86intrin.h>
				323	///
				324	/// This intrinsic corresponds to the \c VPHSUBW instruction.
				325	///
				326	/// \param __a
				327	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				328	/// horizontal differences between the values are stored in the lower bits of
				329	/// the destination.
				330	/// \param __b
				331	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				332	/// horizontal differences between the values are stored in the upper bits of
				333	/// the destination.
				334	/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
				335	/// of both operands.
				336	static __inline__ __m128i __DEFAULT_FN_ATTRS
				337	_mm_hsub_epi16(__m128i __a, __m128i __b)
				338	{
				339	return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
				340	}
				341
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	342	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	343	/// packed 128-bit vectors of [4 x i32].
				344	///
				345	/// \headerfile <x86intrin.h>
				346	///
				347	/// This intrinsic corresponds to the \c VPHSUBD instruction.
				348	///
				349	/// \param __a
				350	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				351	/// horizontal differences between the values are stored in the lower bits of
				352	/// the destination.
				353	/// \param __b
				354	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				355	/// horizontal differences between the values are stored in the upper bits of
				356	/// the destination.
				357	/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
				358	/// of both operands.
				359	static __inline__ __m128i __DEFAULT_FN_ATTRS
				360	_mm_hsub_epi32(__m128i __a, __m128i __b)
				361	{
				362	return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
				363	}
				364
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	365	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	366	/// packed 64-bit vectors of [4 x i16].
				367	///
				368	/// \headerfile <x86intrin.h>
				369	///
				370	/// This intrinsic corresponds to the \c PHSUBW instruction.
				371	///
				372	/// \param __a
				373	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				374	/// horizontal differences between the values are stored in the lower bits of
				375	/// the destination.
				376	/// \param __b
				377	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				378	/// horizontal differences between the values are stored in the upper bits of
				379	/// the destination.
				380	/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
				381	/// of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	382	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	383	_mm_hsub_pi16(__m64 __a, __m64 __b)
				384	{
				385	return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
				386	}
				387
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	388	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	389	/// packed 64-bit vectors of [2 x i32].
				390	///
				391	/// \headerfile <x86intrin.h>
				392	///
				393	/// This intrinsic corresponds to the \c PHSUBD instruction.
				394	///
				395	/// \param __a
				396	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				397	/// horizontal differences between the values are stored in the lower bits of
				398	/// the destination.
				399	/// \param __b
				400	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				401	/// horizontal differences between the values are stored in the upper bits of
				402	/// the destination.
				403	/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
				404	/// of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	405	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	406	_mm_hsub_pi32(__m64 __a, __m64 __b)
				407	{
				408	return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
				409	}
				410
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	411	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	412	/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	413	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
				414	/// saturated to 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	415	///
				416	/// \headerfile <x86intrin.h>
				417	///
				418	/// This intrinsic corresponds to the \c VPHSUBSW instruction.
				419	///
				420	/// \param __a
				421	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				422	/// horizontal differences between the values are stored in the lower bits of
				423	/// the destination.
				424	/// \param __b
				425	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				426	/// horizontal differences between the values are stored in the upper bits of
				427	/// the destination.
				428	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
				429	/// differences of both operands.
				430	static __inline__ __m128i __DEFAULT_FN_ATTRS
				431	_mm_hsubs_epi16(__m128i __a, __m128i __b)
				432	{
				433	return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
				434	}
				435
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	436	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	437	/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	438	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
				439	/// saturated to 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	440	///
				441	/// \headerfile <x86intrin.h>
				442	///
				443	/// This intrinsic corresponds to the \c PHSUBSW instruction.
				444	///
				445	/// \param __a
				446	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				447	/// horizontal differences between the values are stored in the lower bits of
				448	/// the destination.
				449	/// \param __b
				450	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				451	/// horizontal differences between the values are stored in the upper bits of
				452	/// the destination.
				453	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
				454	/// differences of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	455	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	456	_mm_hsubs_pi16(__m64 __a, __m64 __b)
				457	{
				458	return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
				459	}
				460
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	461	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	462	/// values contained in the first source operand and packed 8-bit signed
				463	/// integer values contained in the second source operand, adds pairs of
				464	/// contiguous products with signed saturation, and writes the 16-bit sums to
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	465	/// the corresponding bits in the destination.
				466	///
				467	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
				468	/// both operands are multiplied, and the sum of both results is written to
				469	/// bits [15:0] of the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	470	///
				471	/// \headerfile <x86intrin.h>
				472	///
				473	/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
				474	///
				475	/// \param __a
				476	/// A 128-bit integer vector containing the first source operand.
				477	/// \param __b
				478	/// A 128-bit integer vector containing the second source operand.
				479	/// \returns A 128-bit integer vector containing the sums of products of both
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	480	/// operands: \n
				481	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
				482	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
				483	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
				484	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
				485	/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
				486	/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
				487	/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
				488	/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	489	static __inline__ __m128i __DEFAULT_FN_ATTRS
				490	_mm_maddubs_epi16(__m128i __a, __m128i __b)
				491	{
				492	return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
				493	}
				494
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	495	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	496	/// values contained in the first source operand and packed 8-bit signed
				497	/// integer values contained in the second source operand, adds pairs of
				498	/// contiguous products with signed saturation, and writes the 16-bit sums to
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	499	/// the corresponding bits in the destination.
				500	///
				501	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
				502	/// both operands are multiplied, and the sum of both results is written to
				503	/// bits [15:0] of the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	504	///
				505	/// \headerfile <x86intrin.h>
				506	///
				507	/// This intrinsic corresponds to the \c PMADDUBSW instruction.
				508	///
				509	/// \param __a
				510	/// A 64-bit integer vector containing the first source operand.
				511	/// \param __b
				512	/// A 64-bit integer vector containing the second source operand.
				513	/// \returns A 64-bit integer vector containing the sums of products of both
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	514	/// operands: \n
				515	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
				516	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
				517	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
				518	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
				519	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	520	_mm_maddubs_pi16(__m64 __a, __m64 __b)
				521	{
				522	return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
				523	}
				524
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	525	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	526	/// products to the 18 most significant bits by right-shifting, rounds the
				527	/// truncated value by adding 1, and writes bits [16:1] to the destination.
				528	///
				529	/// \headerfile <x86intrin.h>
				530	///
				531	/// This intrinsic corresponds to the \c VPMULHRSW instruction.
				532	///
				533	/// \param __a
				534	/// A 128-bit vector of [8 x i16] containing one of the source operands.
				535	/// \param __b
				536	/// A 128-bit vector of [8 x i16] containing one of the source operands.
				537	/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
				538	/// products of both operands.
				539	static __inline__ __m128i __DEFAULT_FN_ATTRS
				540	_mm_mulhrs_epi16(__m128i __a, __m128i __b)
				541	{
				542	return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
				543	}
				544
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	545	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	546	/// products to the 18 most significant bits by right-shifting, rounds the
				547	/// truncated value by adding 1, and writes bits [16:1] to the destination.
				548	///
				549	/// \headerfile <x86intrin.h>
				550	///
				551	/// This intrinsic corresponds to the \c PMULHRSW instruction.
				552	///
				553	/// \param __a
				554	/// A 64-bit vector of [4 x i16] containing one of the source operands.
				555	/// \param __b
				556	/// A 64-bit vector of [4 x i16] containing one of the source operands.
				557	/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
				558	/// products of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	559	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	560	_mm_mulhrs_pi16(__m64 __a, __m64 __b)
				561	{
				562	return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
				563	}
				564
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	565	/// Copies the 8-bit integers from a 128-bit integer vector to the
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	566	/// destination or clears 8-bit values in the destination, as specified by
				567	/// the second source operand.
				568	///
				569	/// \headerfile <x86intrin.h>
				570	///
				571	/// This intrinsic corresponds to the \c VPSHUFB instruction.
				572	///
				573	/// \param __a
				574	/// A 128-bit integer vector containing the values to be copied.
				575	/// \param __b
				576	/// A 128-bit integer vector containing control bytes corresponding to
				577	/// positions in the destination:
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	578	/// Bit 7: \n
				579	/// 1: Clear the corresponding byte in the destination. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	580	/// 0: Copy the selected source byte to the corresponding byte in the
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	581	/// destination. \n
				582	/// Bits [6:4] Reserved. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	583	/// Bits [3:0] select the source byte to be copied.
				584	/// \returns A 128-bit integer vector containing the copied or cleared values.
				585	static __inline__ __m128i __DEFAULT_FN_ATTRS
				586	_mm_shuffle_epi8(__m128i __a, __m128i __b)
				587	{
				588	return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
				589	}
				590
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	591	/// Copies the 8-bit integers from a 64-bit integer vector to the
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	592	/// destination or clears 8-bit values in the destination, as specified by
				593	/// the second source operand.
				594	///
				595	/// \headerfile <x86intrin.h>
				596	///
				597	/// This intrinsic corresponds to the \c PSHUFB instruction.
				598	///
				599	/// \param __a
				600	/// A 64-bit integer vector containing the values to be copied.
				601	/// \param __b
				602	/// A 64-bit integer vector containing control bytes corresponding to
				603	/// positions in the destination:
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	604	/// Bit 7: \n
				605	/// 1: Clear the corresponding byte in the destination. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	606	/// 0: Copy the selected source byte to the corresponding byte in the
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	607	/// destination. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	608	/// Bits [3:0] select the source byte to be copied.
				609	/// \returns A 64-bit integer vector containing the copied or cleared values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	610	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	611	_mm_shuffle_pi8(__m64 __a, __m64 __b)
				612	{
				613	return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
				614	}
				615
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	616	/// For each 8-bit integer in the first source operand, perform one of
				617	/// the following actions as specified by the second source operand.
				618	///
				619	/// If the byte in the second source is negative, calculate the two's
				620	/// complement of the corresponding byte in the first source, and write that
				621	/// value to the destination. If the byte in the second source is positive,
				622	/// copy the corresponding byte from the first source to the destination. If
				623	/// the byte in the second source is zero, clear the corresponding byte in
				624	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	625	///
				626	/// \headerfile <x86intrin.h>
				627	///
				628	/// This intrinsic corresponds to the \c VPSIGNB instruction.
				629	///
				630	/// \param __a
				631	/// A 128-bit integer vector containing the values to be copied.
				632	/// \param __b
				633	/// A 128-bit integer vector containing control bytes corresponding to
				634	/// positions in the destination.
				635	/// \returns A 128-bit integer vector containing the resultant values.
				636	static __inline__ __m128i __DEFAULT_FN_ATTRS
				637	_mm_sign_epi8(__m128i __a, __m128i __b)
				638	{
				639	return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
				640	}
				641
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	642	/// For each 16-bit integer in the first source operand, perform one of
				643	/// the following actions as specified by the second source operand.
				644	///
				645	/// If the word in the second source is negative, calculate the two's
				646	/// complement of the corresponding word in the first source, and write that
				647	/// value to the destination. If the word in the second source is positive,
				648	/// copy the corresponding word from the first source to the destination. If
				649	/// the word in the second source is zero, clear the corresponding word in
				650	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	651	///
				652	/// \headerfile <x86intrin.h>
				653	///
				654	/// This intrinsic corresponds to the \c VPSIGNW instruction.
				655	///
				656	/// \param __a
				657	/// A 128-bit integer vector containing the values to be copied.
				658	/// \param __b
				659	/// A 128-bit integer vector containing control words corresponding to
				660	/// positions in the destination.
				661	/// \returns A 128-bit integer vector containing the resultant values.
				662	static __inline__ __m128i __DEFAULT_FN_ATTRS
				663	_mm_sign_epi16(__m128i __a, __m128i __b)
				664	{
				665	return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
				666	}
				667
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	668	/// For each 32-bit integer in the first source operand, perform one of
				669	/// the following actions as specified by the second source operand.
				670	///
				671	/// If the doubleword in the second source is negative, calculate the two's
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	672	/// complement of the corresponding word in the first source, and write that
				673	/// value to the destination. If the doubleword in the second source is
				674	/// positive, copy the corresponding word from the first source to the
				675	/// destination. If the doubleword in the second source is zero, clear the
				676	/// corresponding word in the destination.
				677	///
				678	/// \headerfile <x86intrin.h>
				679	///
				680	/// This intrinsic corresponds to the \c VPSIGND instruction.
				681	///
				682	/// \param __a
				683	/// A 128-bit integer vector containing the values to be copied.
				684	/// \param __b
				685	/// A 128-bit integer vector containing control doublewords corresponding to
				686	/// positions in the destination.
				687	/// \returns A 128-bit integer vector containing the resultant values.
				688	static __inline__ __m128i __DEFAULT_FN_ATTRS
				689	_mm_sign_epi32(__m128i __a, __m128i __b)
				690	{
				691	return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
				692	}
				693
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	694	/// For each 8-bit integer in the first source operand, perform one of
				695	/// the following actions as specified by the second source operand.
				696	///
				697	/// If the byte in the second source is negative, calculate the two's
				698	/// complement of the corresponding byte in the first source, and write that
				699	/// value to the destination. If the byte in the second source is positive,
				700	/// copy the corresponding byte from the first source to the destination. If
				701	/// the byte in the second source is zero, clear the corresponding byte in
				702	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	703	///
				704	/// \headerfile <x86intrin.h>
				705	///
				706	/// This intrinsic corresponds to the \c PSIGNB instruction.
				707	///
				708	/// \param __a
				709	/// A 64-bit integer vector containing the values to be copied.
				710	/// \param __b
				711	/// A 64-bit integer vector containing control bytes corresponding to
				712	/// positions in the destination.
				713	/// \returns A 64-bit integer vector containing the resultant values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	714	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	715	_mm_sign_pi8(__m64 __a, __m64 __b)
				716	{
				717	return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
				718	}
				719
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	720	/// For each 16-bit integer in the first source operand, perform one of
				721	/// the following actions as specified by the second source operand.
				722	///
				723	/// If the word in the second source is negative, calculate the two's
				724	/// complement of the corresponding word in the first source, and write that
				725	/// value to the destination. If the word in the second source is positive,
				726	/// copy the corresponding word from the first source to the destination. If
				727	/// the word in the second source is zero, clear the corresponding word in
				728	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	729	///
				730	/// \headerfile <x86intrin.h>
				731	///
				732	/// This intrinsic corresponds to the \c PSIGNW instruction.
				733	///
				734	/// \param __a
				735	/// A 64-bit integer vector containing the values to be copied.
				736	/// \param __b
				737	/// A 64-bit integer vector containing control words corresponding to
				738	/// positions in the destination.
				739	/// \returns A 64-bit integer vector containing the resultant values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	740	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	741	_mm_sign_pi16(__m64 __a, __m64 __b)
				742	{
				743	return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
				744	}
				745
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	746	/// For each 32-bit integer in the first source operand, perform one of
				747	/// the following actions as specified by the second source operand.
				748	///
				749	/// If the doubleword in the second source is negative, calculate the two's
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	750	/// complement of the corresponding doubleword in the first source, and
				751	/// write that value to the destination. If the doubleword in the second
				752	/// source is positive, copy the corresponding doubleword from the first
				753	/// source to the destination. If the doubleword in the second source is
				754	/// zero, clear the corresponding doubleword in the destination.
				755	///
				756	/// \headerfile <x86intrin.h>
				757	///
				758	/// This intrinsic corresponds to the \c PSIGND instruction.
				759	///
				760	/// \param __a
				761	/// A 64-bit integer vector containing the values to be copied.
				762	/// \param __b
				763	/// A 64-bit integer vector containing two control doublewords corresponding
				764	/// to positions in the destination.
				765	/// \returns A 64-bit integer vector containing the resultant values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	766	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	767	_mm_sign_pi32(__m64 __a, __m64 __b)
				768	{
				769	return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
				770	}
				771
				772	#undef __DEFAULT_FN_ATTRS
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	773	#undef __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	774
				775	#endif /* __TMMINTRIN_H */