Blame - linux-x86/lib64/clang/14.0.0/include/tmmintrin.h - platform/prebuilts/clang-tools

blob: dbd959d0a62cb3d8f65a3786b60e657c0f4fb2a0 [file] [log] [blame]

Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	1	/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
				2	*
Logan Chien	df4f766	2019-09-04 16:45:23 -0700	[diff] [blame]	3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	* See https://llvm.org/LICENSE.txt for license information.
				5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	6	*
				7	*===-----------------------------------------------------------------------===
				8	*/
				9
				10	#ifndef __TMMINTRIN_H
				11	#define __TMMINTRIN_H
				12
				13	#include <pmmintrin.h>
				14
				15	/* Define the default attributes for the functions in this file. */
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	16	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
				17	#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	18
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	19	/// Computes the absolute value of each of the packed 8-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	20	/// integers in the source operand and stores the 8-bit unsigned integer
				21	/// results in the destination.
				22	///
				23	/// \headerfile <x86intrin.h>
				24	///
				25	/// This intrinsic corresponds to the \c PABSB instruction.
				26	///
				27	/// \param __a
				28	/// A 64-bit vector of [8 x i8].
				29	/// \returns A 64-bit integer vector containing the absolute values of the
				30	/// elements in the operand.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	31	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	32	_mm_abs_pi8(__m64 __a)
				33	{
				34	return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
				35	}
				36
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	37	/// Computes the absolute value of each of the packed 8-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	38	/// integers in the source operand and stores the 8-bit unsigned integer
				39	/// results in the destination.
				40	///
				41	/// \headerfile <x86intrin.h>
				42	///
				43	/// This intrinsic corresponds to the \c VPABSB instruction.
				44	///
				45	/// \param __a
				46	/// A 128-bit vector of [16 x i8].
				47	/// \returns A 128-bit integer vector containing the absolute values of the
				48	/// elements in the operand.
				49	static __inline__ __m128i __DEFAULT_FN_ATTRS
				50	_mm_abs_epi8(__m128i __a)
				51	{
				52	return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
				53	}
				54
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	55	/// Computes the absolute value of each of the packed 16-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	56	/// integers in the source operand and stores the 16-bit unsigned integer
				57	/// results in the destination.
				58	///
				59	/// \headerfile <x86intrin.h>
				60	///
				61	/// This intrinsic corresponds to the \c PABSW instruction.
				62	///
				63	/// \param __a
				64	/// A 64-bit vector of [4 x i16].
				65	/// \returns A 64-bit integer vector containing the absolute values of the
				66	/// elements in the operand.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	67	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	68	_mm_abs_pi16(__m64 __a)
				69	{
				70	return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
				71	}
				72
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	73	/// Computes the absolute value of each of the packed 16-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	74	/// integers in the source operand and stores the 16-bit unsigned integer
				75	/// results in the destination.
				76	///
				77	/// \headerfile <x86intrin.h>
				78	///
				79	/// This intrinsic corresponds to the \c VPABSW instruction.
				80	///
				81	/// \param __a
				82	/// A 128-bit vector of [8 x i16].
				83	/// \returns A 128-bit integer vector containing the absolute values of the
				84	/// elements in the operand.
				85	static __inline__ __m128i __DEFAULT_FN_ATTRS
				86	_mm_abs_epi16(__m128i __a)
				87	{
				88	return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
				89	}
				90
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	91	/// Computes the absolute value of each of the packed 32-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	92	/// integers in the source operand and stores the 32-bit unsigned integer
				93	/// results in the destination.
				94	///
				95	/// \headerfile <x86intrin.h>
				96	///
				97	/// This intrinsic corresponds to the \c PABSD instruction.
				98	///
				99	/// \param __a
				100	/// A 64-bit vector of [2 x i32].
				101	/// \returns A 64-bit integer vector containing the absolute values of the
				102	/// elements in the operand.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	103	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	104	_mm_abs_pi32(__m64 __a)
				105	{
				106	return (__m64)__builtin_ia32_pabsd((__v2si)__a);
				107	}
				108
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	109	/// Computes the absolute value of each of the packed 32-bit signed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	110	/// integers in the source operand and stores the 32-bit unsigned integer
				111	/// results in the destination.
				112	///
				113	/// \headerfile <x86intrin.h>
				114	///
				115	/// This intrinsic corresponds to the \c VPABSD instruction.
				116	///
				117	/// \param __a
				118	/// A 128-bit vector of [4 x i32].
				119	/// \returns A 128-bit integer vector containing the absolute values of the
				120	/// elements in the operand.
				121	static __inline__ __m128i __DEFAULT_FN_ATTRS
				122	_mm_abs_epi32(__m128i __a)
				123	{
				124	return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
				125	}
				126
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	127	/// Concatenates the two 128-bit integer vector operands, and
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	128	/// right-shifts the result by the number of bytes specified in the immediate
				129	/// operand.
				130	///
				131	/// \headerfile <x86intrin.h>
				132	///
				133	/// \code
				134	/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
				135	/// \endcode
				136	///
				137	/// This intrinsic corresponds to the \c PALIGNR instruction.
				138	///
				139	/// \param a
				140	/// A 128-bit vector of [16 x i8] containing one of the source operands.
				141	/// \param b
				142	/// A 128-bit vector of [16 x i8] containing one of the source operands.
				143	/// \param n
				144	/// An immediate operand specifying how many bytes to right-shift the result.
				145	/// \returns A 128-bit integer vector containing the concatenated right-shifted
				146	/// value.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	147	#define _mm_alignr_epi8(a, b, n) \
Pirama Arumuga Nainar	494f645	2021-12-02 10:42:14 -0800	[diff] [blame]	148	((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
				149	(__v16qi)(__m128i)(b), (n)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	150
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	151	/// Concatenates the two 64-bit integer vector operands, and right-shifts
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	152	/// the result by the number of bytes specified in the immediate operand.
				153	///
				154	/// \headerfile <x86intrin.h>
				155	///
				156	/// \code
				157	/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
				158	/// \endcode
				159	///
				160	/// This intrinsic corresponds to the \c PALIGNR instruction.
				161	///
				162	/// \param a
				163	/// A 64-bit vector of [8 x i8] containing one of the source operands.
				164	/// \param b
				165	/// A 64-bit vector of [8 x i8] containing one of the source operands.
				166	/// \param n
				167	/// An immediate operand specifying how many bytes to right-shift the result.
				168	/// \returns A 64-bit integer vector containing the concatenated right-shifted
				169	/// value.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	170	#define _mm_alignr_pi8(a, b, n) \
Pirama Arumuga Nainar	494f645	2021-12-02 10:42:14 -0800	[diff] [blame]	171	((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	172
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	173	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	174	/// 128-bit vectors of [8 x i16].
				175	///
				176	/// \headerfile <x86intrin.h>
				177	///
				178	/// This intrinsic corresponds to the \c VPHADDW instruction.
				179	///
				180	/// \param __a
				181	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				182	/// horizontal sums of the values are stored in the lower bits of the
				183	/// destination.
				184	/// \param __b
				185	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				186	/// horizontal sums of the values are stored in the upper bits of the
				187	/// destination.
				188	/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
				189	/// both operands.
				190	static __inline__ __m128i __DEFAULT_FN_ATTRS
				191	_mm_hadd_epi16(__m128i __a, __m128i __b)
				192	{
				193	return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
				194	}
				195
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	196	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	197	/// 128-bit vectors of [4 x i32].
				198	///
				199	/// \headerfile <x86intrin.h>
				200	///
				201	/// This intrinsic corresponds to the \c VPHADDD instruction.
				202	///
				203	/// \param __a
				204	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				205	/// horizontal sums of the values are stored in the lower bits of the
				206	/// destination.
				207	/// \param __b
				208	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				209	/// horizontal sums of the values are stored in the upper bits of the
				210	/// destination.
				211	/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
				212	/// both operands.
				213	static __inline__ __m128i __DEFAULT_FN_ATTRS
				214	_mm_hadd_epi32(__m128i __a, __m128i __b)
				215	{
				216	return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
				217	}
				218
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	219	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	220	/// 64-bit vectors of [4 x i16].
				221	///
				222	/// \headerfile <x86intrin.h>
				223	///
				224	/// This intrinsic corresponds to the \c PHADDW instruction.
				225	///
				226	/// \param __a
				227	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				228	/// horizontal sums of the values are stored in the lower bits of the
				229	/// destination.
				230	/// \param __b
				231	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				232	/// horizontal sums of the values are stored in the upper bits of the
				233	/// destination.
				234	/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
				235	/// operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	236	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	237	_mm_hadd_pi16(__m64 __a, __m64 __b)
				238	{
				239	return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
				240	}
				241
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	242	/// Horizontally adds the adjacent pairs of values contained in 2 packed
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	243	/// 64-bit vectors of [2 x i32].
				244	///
				245	/// \headerfile <x86intrin.h>
				246	///
				247	/// This intrinsic corresponds to the \c PHADDD instruction.
				248	///
				249	/// \param __a
				250	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				251	/// horizontal sums of the values are stored in the lower bits of the
				252	/// destination.
				253	/// \param __b
				254	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				255	/// horizontal sums of the values are stored in the upper bits of the
				256	/// destination.
				257	/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
				258	/// operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	259	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	260	_mm_hadd_pi32(__m64 __a, __m64 __b)
				261	{
				262	return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
				263	}
				264
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	265	/// Horizontally adds the adjacent pairs of values contained in 2 packed
				266	/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
				267	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
				268	/// 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	269	///
				270	/// \headerfile <x86intrin.h>
				271	///
				272	/// This intrinsic corresponds to the \c VPHADDSW instruction.
				273	///
				274	/// \param __a
				275	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				276	/// horizontal sums of the values are stored in the lower bits of the
				277	/// destination.
				278	/// \param __b
				279	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				280	/// horizontal sums of the values are stored in the upper bits of the
				281	/// destination.
				282	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
				283	/// sums of both operands.
				284	static __inline__ __m128i __DEFAULT_FN_ATTRS
				285	_mm_hadds_epi16(__m128i __a, __m128i __b)
				286	{
				287	return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
				288	}
				289
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	290	/// Horizontally adds the adjacent pairs of values contained in 2 packed
				291	/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
				292	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
				293	/// 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	294	///
				295	/// \headerfile <x86intrin.h>
				296	///
				297	/// This intrinsic corresponds to the \c PHADDSW instruction.
				298	///
				299	/// \param __a
				300	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				301	/// horizontal sums of the values are stored in the lower bits of the
				302	/// destination.
				303	/// \param __b
				304	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				305	/// horizontal sums of the values are stored in the upper bits of the
				306	/// destination.
				307	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
				308	/// sums of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	309	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	310	_mm_hadds_pi16(__m64 __a, __m64 __b)
				311	{
				312	return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
				313	}
				314
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	315	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	316	/// packed 128-bit vectors of [8 x i16].
				317	///
				318	/// \headerfile <x86intrin.h>
				319	///
				320	/// This intrinsic corresponds to the \c VPHSUBW instruction.
				321	///
				322	/// \param __a
				323	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				324	/// horizontal differences between the values are stored in the lower bits of
				325	/// the destination.
				326	/// \param __b
				327	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				328	/// horizontal differences between the values are stored in the upper bits of
				329	/// the destination.
				330	/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
				331	/// of both operands.
				332	static __inline__ __m128i __DEFAULT_FN_ATTRS
				333	_mm_hsub_epi16(__m128i __a, __m128i __b)
				334	{
				335	return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
				336	}
				337
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	338	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	339	/// packed 128-bit vectors of [4 x i32].
				340	///
				341	/// \headerfile <x86intrin.h>
				342	///
				343	/// This intrinsic corresponds to the \c VPHSUBD instruction.
				344	///
				345	/// \param __a
				346	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				347	/// horizontal differences between the values are stored in the lower bits of
				348	/// the destination.
				349	/// \param __b
				350	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
				351	/// horizontal differences between the values are stored in the upper bits of
				352	/// the destination.
				353	/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
				354	/// of both operands.
				355	static __inline__ __m128i __DEFAULT_FN_ATTRS
				356	_mm_hsub_epi32(__m128i __a, __m128i __b)
				357	{
				358	return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
				359	}
				360
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	361	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	362	/// packed 64-bit vectors of [4 x i16].
				363	///
				364	/// \headerfile <x86intrin.h>
				365	///
				366	/// This intrinsic corresponds to the \c PHSUBW instruction.
				367	///
				368	/// \param __a
				369	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				370	/// horizontal differences between the values are stored in the lower bits of
				371	/// the destination.
				372	/// \param __b
				373	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				374	/// horizontal differences between the values are stored in the upper bits of
				375	/// the destination.
				376	/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
				377	/// of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	378	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	379	_mm_hsub_pi16(__m64 __a, __m64 __b)
				380	{
				381	return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
				382	}
				383
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	384	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	385	/// packed 64-bit vectors of [2 x i32].
				386	///
				387	/// \headerfile <x86intrin.h>
				388	///
				389	/// This intrinsic corresponds to the \c PHSUBD instruction.
				390	///
				391	/// \param __a
				392	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				393	/// horizontal differences between the values are stored in the lower bits of
				394	/// the destination.
				395	/// \param __b
				396	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
				397	/// horizontal differences between the values are stored in the upper bits of
				398	/// the destination.
				399	/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
				400	/// of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	401	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	402	_mm_hsub_pi32(__m64 __a, __m64 __b)
				403	{
				404	return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
				405	}
				406
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	407	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	408	/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	409	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
				410	/// saturated to 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	411	///
				412	/// \headerfile <x86intrin.h>
				413	///
				414	/// This intrinsic corresponds to the \c VPHSUBSW instruction.
				415	///
				416	/// \param __a
				417	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				418	/// horizontal differences between the values are stored in the lower bits of
				419	/// the destination.
				420	/// \param __b
				421	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
				422	/// horizontal differences between the values are stored in the upper bits of
				423	/// the destination.
				424	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
				425	/// differences of both operands.
				426	static __inline__ __m128i __DEFAULT_FN_ATTRS
				427	_mm_hsubs_epi16(__m128i __a, __m128i __b)
				428	{
				429	return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
				430	}
				431
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	432	/// Horizontally subtracts the adjacent pairs of values contained in 2
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	433	/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	434	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
				435	/// saturated to 0x8000.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	436	///
				437	/// \headerfile <x86intrin.h>
				438	///
				439	/// This intrinsic corresponds to the \c PHSUBSW instruction.
				440	///
				441	/// \param __a
				442	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				443	/// horizontal differences between the values are stored in the lower bits of
				444	/// the destination.
				445	/// \param __b
				446	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
				447	/// horizontal differences between the values are stored in the upper bits of
				448	/// the destination.
				449	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
				450	/// differences of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	451	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	452	_mm_hsubs_pi16(__m64 __a, __m64 __b)
				453	{
				454	return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
				455	}
				456
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	457	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	458	/// values contained in the first source operand and packed 8-bit signed
				459	/// integer values contained in the second source operand, adds pairs of
				460	/// contiguous products with signed saturation, and writes the 16-bit sums to
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	461	/// the corresponding bits in the destination.
				462	///
				463	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
				464	/// both operands are multiplied, and the sum of both results is written to
				465	/// bits [15:0] of the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	466	///
				467	/// \headerfile <x86intrin.h>
				468	///
				469	/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
				470	///
				471	/// \param __a
				472	/// A 128-bit integer vector containing the first source operand.
				473	/// \param __b
				474	/// A 128-bit integer vector containing the second source operand.
				475	/// \returns A 128-bit integer vector containing the sums of products of both
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	476	/// operands: \n
				477	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
				478	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
				479	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
				480	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
				481	/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
				482	/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
				483	/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
				484	/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	485	static __inline__ __m128i __DEFAULT_FN_ATTRS
				486	_mm_maddubs_epi16(__m128i __a, __m128i __b)
				487	{
				488	return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
				489	}
				490
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	491	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	492	/// values contained in the first source operand and packed 8-bit signed
				493	/// integer values contained in the second source operand, adds pairs of
				494	/// contiguous products with signed saturation, and writes the 16-bit sums to
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	495	/// the corresponding bits in the destination.
				496	///
				497	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
				498	/// both operands are multiplied, and the sum of both results is written to
				499	/// bits [15:0] of the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	500	///
				501	/// \headerfile <x86intrin.h>
				502	///
				503	/// This intrinsic corresponds to the \c PMADDUBSW instruction.
				504	///
				505	/// \param __a
				506	/// A 64-bit integer vector containing the first source operand.
				507	/// \param __b
				508	/// A 64-bit integer vector containing the second source operand.
				509	/// \returns A 64-bit integer vector containing the sums of products of both
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	510	/// operands: \n
				511	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
				512	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
				513	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
				514	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
				515	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	516	_mm_maddubs_pi16(__m64 __a, __m64 __b)
				517	{
				518	return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
				519	}
				520
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	521	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	522	/// products to the 18 most significant bits by right-shifting, rounds the
				523	/// truncated value by adding 1, and writes bits [16:1] to the destination.
				524	///
				525	/// \headerfile <x86intrin.h>
				526	///
				527	/// This intrinsic corresponds to the \c VPMULHRSW instruction.
				528	///
				529	/// \param __a
				530	/// A 128-bit vector of [8 x i16] containing one of the source operands.
				531	/// \param __b
				532	/// A 128-bit vector of [8 x i16] containing one of the source operands.
				533	/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
				534	/// products of both operands.
				535	static __inline__ __m128i __DEFAULT_FN_ATTRS
				536	_mm_mulhrs_epi16(__m128i __a, __m128i __b)
				537	{
				538	return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
				539	}
				540
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	541	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	542	/// products to the 18 most significant bits by right-shifting, rounds the
				543	/// truncated value by adding 1, and writes bits [16:1] to the destination.
				544	///
				545	/// \headerfile <x86intrin.h>
				546	///
				547	/// This intrinsic corresponds to the \c PMULHRSW instruction.
				548	///
				549	/// \param __a
				550	/// A 64-bit vector of [4 x i16] containing one of the source operands.
				551	/// \param __b
				552	/// A 64-bit vector of [4 x i16] containing one of the source operands.
				553	/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
				554	/// products of both operands.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	555	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	556	_mm_mulhrs_pi16(__m64 __a, __m64 __b)
				557	{
				558	return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
				559	}
				560
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	561	/// Copies the 8-bit integers from a 128-bit integer vector to the
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	562	/// destination or clears 8-bit values in the destination, as specified by
				563	/// the second source operand.
				564	///
				565	/// \headerfile <x86intrin.h>
				566	///
				567	/// This intrinsic corresponds to the \c VPSHUFB instruction.
				568	///
				569	/// \param __a
				570	/// A 128-bit integer vector containing the values to be copied.
				571	/// \param __b
				572	/// A 128-bit integer vector containing control bytes corresponding to
				573	/// positions in the destination:
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	574	/// Bit 7: \n
				575	/// 1: Clear the corresponding byte in the destination. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	576	/// 0: Copy the selected source byte to the corresponding byte in the
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	577	/// destination. \n
				578	/// Bits [6:4] Reserved. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	579	/// Bits [3:0] select the source byte to be copied.
				580	/// \returns A 128-bit integer vector containing the copied or cleared values.
				581	static __inline__ __m128i __DEFAULT_FN_ATTRS
				582	_mm_shuffle_epi8(__m128i __a, __m128i __b)
				583	{
				584	return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
				585	}
				586
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	587	/// Copies the 8-bit integers from a 64-bit integer vector to the
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	588	/// destination or clears 8-bit values in the destination, as specified by
				589	/// the second source operand.
				590	///
				591	/// \headerfile <x86intrin.h>
				592	///
				593	/// This intrinsic corresponds to the \c PSHUFB instruction.
				594	///
				595	/// \param __a
				596	/// A 64-bit integer vector containing the values to be copied.
				597	/// \param __b
				598	/// A 64-bit integer vector containing control bytes corresponding to
				599	/// positions in the destination:
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	600	/// Bit 7: \n
				601	/// 1: Clear the corresponding byte in the destination. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	602	/// 0: Copy the selected source byte to the corresponding byte in the
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	603	/// destination. \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	604	/// Bits [3:0] select the source byte to be copied.
				605	/// \returns A 64-bit integer vector containing the copied or cleared values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	606	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	607	_mm_shuffle_pi8(__m64 __a, __m64 __b)
				608	{
				609	return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
				610	}
				611
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	612	/// For each 8-bit integer in the first source operand, perform one of
				613	/// the following actions as specified by the second source operand.
				614	///
				615	/// If the byte in the second source is negative, calculate the two's
				616	/// complement of the corresponding byte in the first source, and write that
				617	/// value to the destination. If the byte in the second source is positive,
				618	/// copy the corresponding byte from the first source to the destination. If
				619	/// the byte in the second source is zero, clear the corresponding byte in
				620	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	621	///
				622	/// \headerfile <x86intrin.h>
				623	///
				624	/// This intrinsic corresponds to the \c VPSIGNB instruction.
				625	///
				626	/// \param __a
				627	/// A 128-bit integer vector containing the values to be copied.
				628	/// \param __b
				629	/// A 128-bit integer vector containing control bytes corresponding to
				630	/// positions in the destination.
				631	/// \returns A 128-bit integer vector containing the resultant values.
				632	static __inline__ __m128i __DEFAULT_FN_ATTRS
				633	_mm_sign_epi8(__m128i __a, __m128i __b)
				634	{
				635	return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
				636	}
				637
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	638	/// For each 16-bit integer in the first source operand, perform one of
				639	/// the following actions as specified by the second source operand.
				640	///
				641	/// If the word in the second source is negative, calculate the two's
				642	/// complement of the corresponding word in the first source, and write that
				643	/// value to the destination. If the word in the second source is positive,
				644	/// copy the corresponding word from the first source to the destination. If
				645	/// the word in the second source is zero, clear the corresponding word in
				646	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	647	///
				648	/// \headerfile <x86intrin.h>
				649	///
				650	/// This intrinsic corresponds to the \c VPSIGNW instruction.
				651	///
				652	/// \param __a
				653	/// A 128-bit integer vector containing the values to be copied.
				654	/// \param __b
				655	/// A 128-bit integer vector containing control words corresponding to
				656	/// positions in the destination.
				657	/// \returns A 128-bit integer vector containing the resultant values.
				658	static __inline__ __m128i __DEFAULT_FN_ATTRS
				659	_mm_sign_epi16(__m128i __a, __m128i __b)
				660	{
				661	return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
				662	}
				663
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	664	/// For each 32-bit integer in the first source operand, perform one of
				665	/// the following actions as specified by the second source operand.
				666	///
				667	/// If the doubleword in the second source is negative, calculate the two's
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	668	/// complement of the corresponding word in the first source, and write that
				669	/// value to the destination. If the doubleword in the second source is
				670	/// positive, copy the corresponding word from the first source to the
				671	/// destination. If the doubleword in the second source is zero, clear the
				672	/// corresponding word in the destination.
				673	///
				674	/// \headerfile <x86intrin.h>
				675	///
				676	/// This intrinsic corresponds to the \c VPSIGND instruction.
				677	///
				678	/// \param __a
				679	/// A 128-bit integer vector containing the values to be copied.
				680	/// \param __b
				681	/// A 128-bit integer vector containing control doublewords corresponding to
				682	/// positions in the destination.
				683	/// \returns A 128-bit integer vector containing the resultant values.
				684	static __inline__ __m128i __DEFAULT_FN_ATTRS
				685	_mm_sign_epi32(__m128i __a, __m128i __b)
				686	{
				687	return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
				688	}
				689
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	690	/// For each 8-bit integer in the first source operand, perform one of
				691	/// the following actions as specified by the second source operand.
				692	///
				693	/// If the byte in the second source is negative, calculate the two's
				694	/// complement of the corresponding byte in the first source, and write that
				695	/// value to the destination. If the byte in the second source is positive,
				696	/// copy the corresponding byte from the first source to the destination. If
				697	/// the byte in the second source is zero, clear the corresponding byte in
				698	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	699	///
				700	/// \headerfile <x86intrin.h>
				701	///
				702	/// This intrinsic corresponds to the \c PSIGNB instruction.
				703	///
				704	/// \param __a
				705	/// A 64-bit integer vector containing the values to be copied.
				706	/// \param __b
				707	/// A 64-bit integer vector containing control bytes corresponding to
				708	/// positions in the destination.
				709	/// \returns A 64-bit integer vector containing the resultant values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	710	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	711	_mm_sign_pi8(__m64 __a, __m64 __b)
				712	{
				713	return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
				714	}
				715
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	716	/// For each 16-bit integer in the first source operand, perform one of
				717	/// the following actions as specified by the second source operand.
				718	///
				719	/// If the word in the second source is negative, calculate the two's
				720	/// complement of the corresponding word in the first source, and write that
				721	/// value to the destination. If the word in the second source is positive,
				722	/// copy the corresponding word from the first source to the destination. If
				723	/// the word in the second source is zero, clear the corresponding word in
				724	/// the destination.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	725	///
				726	/// \headerfile <x86intrin.h>
				727	///
				728	/// This intrinsic corresponds to the \c PSIGNW instruction.
				729	///
				730	/// \param __a
				731	/// A 64-bit integer vector containing the values to be copied.
				732	/// \param __b
				733	/// A 64-bit integer vector containing control words corresponding to
				734	/// positions in the destination.
				735	/// \returns A 64-bit integer vector containing the resultant values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	736	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	737	_mm_sign_pi16(__m64 __a, __m64 __b)
				738	{
				739	return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
				740	}
				741
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	742	/// For each 32-bit integer in the first source operand, perform one of
				743	/// the following actions as specified by the second source operand.
				744	///
				745	/// If the doubleword in the second source is negative, calculate the two's
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	746	/// complement of the corresponding doubleword in the first source, and
				747	/// write that value to the destination. If the doubleword in the second
				748	/// source is positive, copy the corresponding doubleword from the first
				749	/// source to the destination. If the doubleword in the second source is
				750	/// zero, clear the corresponding doubleword in the destination.
				751	///
				752	/// \headerfile <x86intrin.h>
				753	///
				754	/// This intrinsic corresponds to the \c PSIGND instruction.
				755	///
				756	/// \param __a
				757	/// A 64-bit integer vector containing the values to be copied.
				758	/// \param __b
				759	/// A 64-bit integer vector containing two control doublewords corresponding
				760	/// to positions in the destination.
				761	/// \returns A 64-bit integer vector containing the resultant values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	762	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	763	_mm_sign_pi32(__m64 __a, __m64 __b)
				764	{
				765	return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
				766	}
				767
				768	#undef __DEFAULT_FN_ATTRS
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	769	#undef __DEFAULT_FN_ATTRS_MMX
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	770
				771	#endif /* __TMMINTRIN_H */