Blame - linux-x86/lib64/clang/14.0.2/include/f16cintrin.h - platform/prebuilts/clang-tools

blob: 13905e6fb0ec8bc53d079aaf80debc14b893e3ed [file] [log] [blame]

Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	1	/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
				2	*
Logan Chien	df4f766	2019-09-04 16:45:23 -0700	[diff] [blame]	3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	* See https://llvm.org/LICENSE.txt for license information.
				5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	6	*
				7	*===-----------------------------------------------------------------------===
				8	*/
				9
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	10	#if !defined __IMMINTRIN_H
				11	#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	12	#endif
				13
				14	#ifndef __F16CINTRIN_H
				15	#define __F16CINTRIN_H
				16
				17	/* Define the default attributes for the functions in this file. */
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	18	#define __DEFAULT_FN_ATTRS128 \
				19	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
				20	#define __DEFAULT_FN_ATTRS256 \
				21	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	22
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	23	/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
				24	* but that's because icc can emulate these without f16c using a library call.
				25	* Since we don't do that let's leave these in f16cintrin.h.
				26	*/
				27
				28	/// Converts a 16-bit half-precision float value into a 32-bit float
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	29	/// value.
				30	///
				31	/// \headerfile <x86intrin.h>
				32	///
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	33	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	34	///
				35	/// \param __a
				36	/// A 16-bit half-precision float value.
				37	/// \returns The converted 32-bit float value.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	38	static __inline float __DEFAULT_FN_ATTRS128
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	39	_cvtsh_ss(unsigned short __a)
				40	{
Logan Chien	df4f766	2019-09-04 16:45:23 -0700	[diff] [blame]	41	__v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
				42	__v4sf __r = __builtin_ia32_vcvtph2ps(__v);
				43	return __r[0];
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	44	}
				45
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	46	/// Converts a 32-bit single-precision float value to a 16-bit
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	47	/// half-precision float value.
				48	///
				49	/// \headerfile <x86intrin.h>
				50	///
				51	/// \code
				52	/// unsigned short _cvtss_sh(float a, const int imm);
				53	/// \endcode
				54	///
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	55	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	56	///
				57	/// \param a
				58	/// A 32-bit single-precision float value to be converted to a 16-bit
				59	/// half-precision float value.
				60	/// \param imm
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	61	/// An immediate value controlling rounding using bits [2:0]: \n
				62	/// 000: Nearest \n
				63	/// 001: Down \n
				64	/// 010: Up \n
				65	/// 011: Truncate \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	66	/// 1XX: Use MXCSR.RC for rounding
				67	/// \returns The converted 16-bit half-precision float value.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	68	#define _cvtss_sh(a, imm) \
Pirama Arumuga Nainar	494f645	2021-12-02 10:42:14 -0800	[diff] [blame]	69	((unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
				70	(imm)))[0]))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	71
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	72	/// Converts a 128-bit vector containing 32-bit float values into a
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	73	/// 128-bit vector containing 16-bit half-precision float values.
				74	///
				75	/// \headerfile <x86intrin.h>
				76	///
				77	/// \code
				78	/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
				79	/// \endcode
				80	///
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	81	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	82	///
				83	/// \param a
				84	/// A 128-bit vector containing 32-bit float values.
				85	/// \param imm
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	86	/// An immediate value controlling rounding using bits [2:0]: \n
				87	/// 000: Nearest \n
				88	/// 001: Down \n
				89	/// 010: Up \n
				90	/// 011: Truncate \n
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	91	/// 1XX: Use MXCSR.RC for rounding
				92	/// \returns A 128-bit vector containing converted 16-bit half-precision float
				93	/// values. The lower 64 bits are used to store the converted 16-bit
				94	/// half-precision floating-point values.
				95	#define _mm_cvtps_ph(a, imm) \
Pirama Arumuga Nainar	494f645	2021-12-02 10:42:14 -0800	[diff] [blame]	96	((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	97
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	98	/// Converts a 128-bit vector containing 16-bit half-precision float
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	99	/// values into a 128-bit vector containing 32-bit float values.
				100	///
				101	/// \headerfile <x86intrin.h>
				102	///
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	103	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	104	///
				105	/// \param __a
				106	/// A 128-bit vector containing 16-bit half-precision float values. The lower
				107	/// 64 bits are used in the conversion.
				108	/// \returns A 128-bit vector of [4 x float] containing converted float values.
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	109	static __inline __m128 __DEFAULT_FN_ATTRS128
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	110	_mm_cvtph_ps(__m128i __a)
				111	{
				112	return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
				113	}
				114
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	115	/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
				116	/// containing 16-bit half-precision float values.
				117	///
				118	/// \headerfile <x86intrin.h>
				119	///
				120	/// \code
				121	/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
				122	/// \endcode
				123	///
				124	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
				125	///
				126	/// \param a
				127	/// A 256-bit vector containing 32-bit single-precision float values to be
				128	/// converted to 16-bit half-precision float values.
				129	/// \param imm
				130	/// An immediate value controlling rounding using bits [2:0]: \n
				131	/// 000: Nearest \n
				132	/// 001: Down \n
				133	/// 010: Up \n
				134	/// 011: Truncate \n
				135	/// 1XX: Use MXCSR.RC for rounding
				136	/// \returns A 128-bit vector containing the converted 16-bit half-precision
				137	/// float values.
				138	#define _mm256_cvtps_ph(a, imm) \
Pirama Arumuga Nainar	494f645	2021-12-02 10:42:14 -0800	[diff] [blame]	139	((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)))
Logan Chien	55afb0a	2018-10-15 10:42:14 +0800	[diff] [blame]	140
				141	/// Converts a 128-bit vector containing 16-bit half-precision float
				142	/// values into a 256-bit vector of [8 x float].
				143	///
				144	/// \headerfile <x86intrin.h>
				145	///
				146	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
				147	///
				148	/// \param __a
				149	/// A 128-bit vector containing 16-bit half-precision float values to be
				150	/// converted to 32-bit single-precision float values.
				151	/// \returns A vector of [8 x float] containing the converted 32-bit
				152	/// single-precision float values.
				153	static __inline __m256 __DEFAULT_FN_ATTRS256
				154	_mm256_cvtph_ps(__m128i __a)
				155	{
				156	return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
				157	}
				158
				159	#undef __DEFAULT_FN_ATTRS128
				160	#undef __DEFAULT_FN_ATTRS256
Logan Chien	2833ffb	2018-10-09 10:03:24 +0800	[diff] [blame]	161
				162	#endif /* __F16CINTRIN_H */