Blame - clang/lib/Headers/f16cintrin.h - toolchain/llvm-project

blob: 1516946ec324ff2e68f13d2997387e14bc43ce11 [file] [log] [blame]

Craig Topper	61f71c3	2013-09-27 03:57:18 +0000	[diff] [blame]	1	/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	2	*
Chandler Carruth	4cf5743	2019-04-08 20:51:30 +0000	[diff] [blame^]	3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	* See https://llvm.org/LICENSE.txt for license information.
				5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	6	*
				7	*===-----------------------------------------------------------------------===
				8	*/
				9
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	10	#if !defined __IMMINTRIN_H
				11	#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	12	#endif
				13
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	14	#ifndef __F16CINTRIN_H
				15	#define __F16CINTRIN_H
				16
Eric Christopher	4d185168	2015-06-17 07:09:20 +0000	[diff] [blame]	17	/* Define the default attributes for the functions in this file. */
Craig Topper	74c10e3	2018-07-09 19:00:16 +0000	[diff] [blame]	18	#define __DEFAULT_FN_ATTRS128 \
				19	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
				20	#define __DEFAULT_FN_ATTRS256 \
				21	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
Eric Christopher	4d185168	2015-06-17 07:09:20 +0000	[diff] [blame]	22
Craig Topper	73d1d40	2018-05-30 22:33:21 +0000	[diff] [blame]	23	/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
				24	* but that's because icc can emulate these without f16c using a library call.
				25	* Since we don't do that let's leave these in f16cintrin.h.
				26	*/
Craig Topper	25caca7	2018-05-22 22:19:19 +0000	[diff] [blame]	27
				28	/// Converts a 16-bit half-precision float value into a 32-bit float
				29	/// value.
				30	///
				31	/// \headerfile <x86intrin.h>
				32	///
				33	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
				34	///
				35	/// \param __a
				36	/// A 16-bit half-precision float value.
				37	/// \returns The converted 32-bit float value.
Craig Topper	74c10e3	2018-07-09 19:00:16 +0000	[diff] [blame]	38	static __inline float __DEFAULT_FN_ATTRS128
Craig Topper	25caca7	2018-05-22 22:19:19 +0000	[diff] [blame]	39	_cvtsh_ss(unsigned short __a)
				40	{
				41	__v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
				42	__v4sf r = __builtin_ia32_vcvtph2ps(v);
				43	return r[0];
				44	}
				45
				46	/// Converts a 32-bit single-precision float value to a 16-bit
				47	/// half-precision float value.
				48	///
				49	/// \headerfile <x86intrin.h>
				50	///
				51	/// \code
				52	/// unsigned short _cvtss_sh(float a, const int imm);
				53	/// \endcode
				54	///
				55	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
				56	///
				57	/// \param a
				58	/// A 32-bit single-precision float value to be converted to a 16-bit
				59	/// half-precision float value.
				60	/// \param imm
				61	/// An immediate value controlling rounding using bits [2:0]: \n
				62	/// 000: Nearest \n
				63	/// 001: Down \n
				64	/// 010: Up \n
				65	/// 011: Truncate \n
				66	/// 1XX: Use MXCSR.RC for rounding
				67	/// \returns The converted 16-bit half-precision float value.
Craig Topper	c633867	2018-05-31 00:51:20 +0000	[diff] [blame]	68	#define _cvtss_sh(a, imm) \
Craig Topper	25caca7	2018-05-22 22:19:19 +0000	[diff] [blame]	69	(unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
Martin Storsjo	cad7a5f	2018-06-01 09:40:50 +0000	[diff] [blame]	70	(imm)))[0])
Craig Topper	25caca7	2018-05-22 22:19:19 +0000	[diff] [blame]	71
				72	/// Converts a 128-bit vector containing 32-bit float values into a
				73	/// 128-bit vector containing 16-bit half-precision float values.
				74	///
				75	/// \headerfile <x86intrin.h>
				76	///
				77	/// \code
				78	/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
				79	/// \endcode
				80	///
				81	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
				82	///
				83	/// \param a
				84	/// A 128-bit vector containing 32-bit float values.
				85	/// \param imm
				86	/// An immediate value controlling rounding using bits [2:0]: \n
				87	/// 000: Nearest \n
				88	/// 001: Down \n
				89	/// 010: Up \n
				90	/// 011: Truncate \n
				91	/// 1XX: Use MXCSR.RC for rounding
				92	/// \returns A 128-bit vector containing converted 16-bit half-precision float
				93	/// values. The lower 64 bits are used to store the converted 16-bit
				94	/// half-precision floating-point values.
Craig Topper	c633867	2018-05-31 00:51:20 +0000	[diff] [blame]	95	#define _mm_cvtps_ph(a, imm) \
Martin Storsjo	cad7a5f	2018-06-01 09:40:50 +0000	[diff] [blame]	96	(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
Craig Topper	25caca7	2018-05-22 22:19:19 +0000	[diff] [blame]	97
				98	/// Converts a 128-bit vector containing 16-bit half-precision float
				99	/// values into a 128-bit vector containing 32-bit float values.
				100	///
				101	/// \headerfile <x86intrin.h>
				102	///
				103	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
				104	///
				105	/// \param __a
				106	/// A 128-bit vector containing 16-bit half-precision float values. The lower
				107	/// 64 bits are used in the conversion.
				108	/// \returns A 128-bit vector of [4 x float] containing converted float values.
Craig Topper	74c10e3	2018-07-09 19:00:16 +0000	[diff] [blame]	109	static __inline __m128 __DEFAULT_FN_ATTRS128
Craig Topper	25caca7	2018-05-22 22:19:19 +0000	[diff] [blame]	110	_mm_cvtph_ps(__m128i __a)
				111	{
				112	return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
				113	}
Ekaterina Romanova	08d1f24	2016-01-22 06:50:50 +0000	[diff] [blame]	114
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	115	/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
				116	/// containing 16-bit half-precision float values.
Ekaterina Romanova	a61946d	2016-02-10 00:12:24 +0000	[diff] [blame]	117	///
				118	/// \headerfile <x86intrin.h>
				119	///
				120	/// \code
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	121	/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
Ekaterina Romanova	a61946d	2016-02-10 00:12:24 +0000	[diff] [blame]	122	/// \endcode
				123	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	124	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
Ekaterina Romanova	a61946d	2016-02-10 00:12:24 +0000	[diff] [blame]	125	///
				126	/// \param a
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	127	/// A 256-bit vector containing 32-bit single-precision float values to be
				128	/// converted to 16-bit half-precision float values.
Ekaterina Romanova	a61946d	2016-02-10 00:12:24 +0000	[diff] [blame]	129	/// \param imm
Ekaterina Romanova	dffe45b	2016-12-27 00:49:38 +0000	[diff] [blame]	130	/// An immediate value controlling rounding using bits [2:0]: \n
				131	/// 000: Nearest \n
				132	/// 001: Down \n
				133	/// 010: Up \n
				134	/// 011: Truncate \n
Ekaterina Romanova	a61946d	2016-02-10 00:12:24 +0000	[diff] [blame]	135	/// 1XX: Use MXCSR.RC for rounding
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	136	/// \returns A 128-bit vector containing the converted 16-bit half-precision
				137	/// float values.
Craig Topper	c633867	2018-05-31 00:51:20 +0000	[diff] [blame]	138	#define _mm256_cvtps_ph(a, imm) \
Martin Storsjo	cad7a5f	2018-06-01 09:40:50 +0000	[diff] [blame]	139	(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	140
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	141	/// Converts a 128-bit vector containing 16-bit half-precision float
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	142	/// values into a 256-bit vector of [8 x float].
Ekaterina Romanova	a61946d	2016-02-10 00:12:24 +0000	[diff] [blame]	143	///
				144	/// \headerfile <x86intrin.h>
				145	///
Ekaterina Romanova	0c1c3bb	2016-12-09 18:35:50 +0000	[diff] [blame]	146	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
Ekaterina Romanova	a61946d	2016-02-10 00:12:24 +0000	[diff] [blame]	147	///
Ekaterina Romanova	1168fdc	2016-05-16 22:54:45 +0000	[diff] [blame]	148	/// \param __a
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	149	/// A 128-bit vector containing 16-bit half-precision float values to be
				150	/// converted to 32-bit single-precision float values.
				151	/// \returns A vector of [8 x float] containing the converted 32-bit
				152	/// single-precision float values.
Craig Topper	74c10e3	2018-07-09 19:00:16 +0000	[diff] [blame]	153	static __inline __m256 __DEFAULT_FN_ATTRS256
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	154	_mm256_cvtph_ps(__m128i __a)
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	155	{
Craig Topper	34c8c0d	2018-05-22 18:54:19 +0000	[diff] [blame]	156	return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	157	}
				158
Craig Topper	74c10e3	2018-07-09 19:00:16 +0000	[diff] [blame]	159	#undef __DEFAULT_FN_ATTRS128
				160	#undef __DEFAULT_FN_ATTRS256
Eric Christopher	4d185168	2015-06-17 07:09:20 +0000	[diff] [blame]	161
Manman Ren	a45358c	2012-10-11 00:59:55 +0000	[diff] [blame]	162	#endif /* __F16CINTRIN_H */