blob: 3806be6ebc437c947708f0689103583ecf22836c [file] [log] [blame]
Logan Chien2833ffb2018-10-09 10:03:24 +08001/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
2 *
Logan Chiendf4f7662019-09-04 16:45:23 -07003 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08006 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __AMMINTRIN_H
11#define __AMMINTRIN_H
12
13#include <pmmintrin.h>
14
15/* Define the default attributes for the functions in this file. */
Logan Chien55afb0a2018-10-15 10:42:14 +080016#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
Logan Chien2833ffb2018-10-09 10:03:24 +080017
Logan Chien55afb0a2018-10-15 10:42:14 +080018/// Extracts the specified bits from the lower 64 bits of the 128-bit
19/// integer vector operand at the index \a idx and of the length \a len.
Logan Chien2833ffb2018-10-09 10:03:24 +080020///
21/// \headerfile <x86intrin.h>
22///
23/// \code
24/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
25/// \endcode
26///
Logan Chien55afb0a2018-10-15 10:42:14 +080027/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
Logan Chien2833ffb2018-10-09 10:03:24 +080028///
29/// \param x
30/// The value from which bits are extracted.
31/// \param len
32/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
33/// are zero, the length is interpreted as 64.
34/// \param idx
35/// Bits [5:0] specify the index of the least significant bit; the other
36/// bits are ignored. If the sum of the index and length is greater than 64,
37/// the result is undefined. If the length and index are both zero, bits
Logan Chien55afb0a2018-10-15 10:42:14 +080038/// [63:0] of parameter \a x are extracted. If the length is zero but the
39/// index is non-zero, the result is undefined.
Logan Chien2833ffb2018-10-09 10:03:24 +080040/// \returns A 128-bit integer vector whose lower 64 bits contain the bits
41/// extracted from the source operand.
42#define _mm_extracti_si64(x, len, idx) \
43 ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
44 (char)(len), (char)(idx)))
45
Logan Chien55afb0a2018-10-15 10:42:14 +080046/// Extracts the specified bits from the lower 64 bits of the 128-bit
47/// integer vector operand at the index and of the length specified by
48/// \a __y.
Logan Chien2833ffb2018-10-09 10:03:24 +080049///
50/// \headerfile <x86intrin.h>
51///
Logan Chien55afb0a2018-10-15 10:42:14 +080052/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
Logan Chien2833ffb2018-10-09 10:03:24 +080053///
54/// \param __x
55/// The value from which bits are extracted.
56/// \param __y
57/// Specifies the index of the least significant bit at [13:8] and the
58/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
59/// length is interpreted as 64. If the sum of the index and length is
60/// greater than 64, the result is undefined. If the length and index are
Logan Chien55afb0a2018-10-15 10:42:14 +080061/// both zero, bits [63:0] of parameter \a __x are extracted. If the length
62/// is zero but the index is non-zero, the result is undefined.
Logan Chien2833ffb2018-10-09 10:03:24 +080063/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
64/// from the source operand.
65static __inline__ __m128i __DEFAULT_FN_ATTRS
66_mm_extract_si64(__m128i __x, __m128i __y)
67{
68 return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
69}
70
Logan Chien55afb0a2018-10-15 10:42:14 +080071/// Inserts bits of a specified length from the source integer vector
72/// \a y into the lower 64 bits of the destination integer vector \a x at
73/// the index \a idx and of the length \a len.
Logan Chien2833ffb2018-10-09 10:03:24 +080074///
75/// \headerfile <x86intrin.h>
76///
77/// \code
78/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
79/// const int idx);
80/// \endcode
81///
Logan Chien55afb0a2018-10-15 10:42:14 +080082/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
Logan Chien2833ffb2018-10-09 10:03:24 +080083///
84/// \param x
85/// The destination operand where bits will be inserted. The inserted bits
Logan Chien55afb0a2018-10-15 10:42:14 +080086/// are defined by the length \a len and by the index \a idx specifying the
87/// least significant bit.
Logan Chien2833ffb2018-10-09 10:03:24 +080088/// \param y
89/// The source operand containing the bits to be extracted. The extracted
Logan Chien55afb0a2018-10-15 10:42:14 +080090/// bits are the least significant bits of operand \a y of length \a len.
Logan Chien2833ffb2018-10-09 10:03:24 +080091/// \param len
92/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
93/// are zero, the length is interpreted as 64.
94/// \param idx
95/// Bits [5:0] specify the index of the least significant bit; the other
96/// bits are ignored. If the sum of the index and length is greater than 64,
97/// the result is undefined. If the length and index are both zero, bits
Logan Chien55afb0a2018-10-15 10:42:14 +080098/// [63:0] of parameter \a y are inserted into parameter \a x. If the length
99/// is zero but the index is non-zero, the result is undefined.
Logan Chien2833ffb2018-10-09 10:03:24 +0800100/// \returns A 128-bit integer vector containing the original lower 64-bits of
Logan Chien55afb0a2018-10-15 10:42:14 +0800101/// destination operand \a x with the specified bitfields replaced by the
102/// lower bits of source operand \a y. The upper 64 bits of the return value
103/// are undefined.
Logan Chien2833ffb2018-10-09 10:03:24 +0800104#define _mm_inserti_si64(x, y, len, idx) \
105 ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
106 (__v2di)(__m128i)(y), \
107 (char)(len), (char)(idx)))
108
Logan Chien55afb0a2018-10-15 10:42:14 +0800109/// Inserts bits of a specified length from the source integer vector
110/// \a __y into the lower 64 bits of the destination integer vector \a __x
111/// at the index and of the length specified by \a __y.
Logan Chien2833ffb2018-10-09 10:03:24 +0800112///
113/// \headerfile <x86intrin.h>
114///
Logan Chien55afb0a2018-10-15 10:42:14 +0800115/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
Logan Chien2833ffb2018-10-09 10:03:24 +0800116///
117/// \param __x
118/// The destination operand where bits will be inserted. The inserted bits
119/// are defined by the length and by the index of the least significant bit
Logan Chien55afb0a2018-10-15 10:42:14 +0800120/// specified by operand \a __y.
Logan Chien2833ffb2018-10-09 10:03:24 +0800121/// \param __y
122/// The source operand containing the bits to be extracted. The extracted
Logan Chien55afb0a2018-10-15 10:42:14 +0800123/// bits are the least significant bits of operand \a __y with length
124/// specified by bits [69:64]. These are inserted into the destination at the
125/// index specified by bits [77:72]; all other bits are ignored. If bits
126/// [69:64] are zero, the length is interpreted as 64. If the sum of the
127/// index and length is greater than 64, the result is undefined. If the
128/// length and index are both zero, bits [63:0] of parameter \a __y are
129/// inserted into parameter \a __x. If the length is zero but the index is
130/// non-zero, the result is undefined.
Logan Chien2833ffb2018-10-09 10:03:24 +0800131/// \returns A 128-bit integer vector containing the original lower 64-bits of
Logan Chien55afb0a2018-10-15 10:42:14 +0800132/// destination operand \a __x with the specified bitfields replaced by the
133/// lower bits of source operand \a __y. The upper 64 bits of the return
134/// value are undefined.
Logan Chien2833ffb2018-10-09 10:03:24 +0800135static __inline__ __m128i __DEFAULT_FN_ATTRS
136_mm_insert_si64(__m128i __x, __m128i __y)
137{
138 return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
139}
140
Logan Chien55afb0a2018-10-15 10:42:14 +0800141/// Stores a 64-bit double-precision value in a 64-bit memory location.
Logan Chien2833ffb2018-10-09 10:03:24 +0800142/// To minimize caching, the data is flagged as non-temporal (unlikely to be
143/// used again soon).
144///
145/// \headerfile <x86intrin.h>
146///
Logan Chien55afb0a2018-10-15 10:42:14 +0800147/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
Logan Chien2833ffb2018-10-09 10:03:24 +0800148///
149/// \param __p
150/// The 64-bit memory location used to store the register value.
151/// \param __a
152/// The 64-bit double-precision floating-point register value to be stored.
153static __inline__ void __DEFAULT_FN_ATTRS
154_mm_stream_sd(double *__p, __m128d __a)
155{
156 __builtin_ia32_movntsd(__p, (__v2df)__a);
157}
158
Logan Chien55afb0a2018-10-15 10:42:14 +0800159/// Stores a 32-bit single-precision floating-point value in a 32-bit
Logan Chien2833ffb2018-10-09 10:03:24 +0800160/// memory location. To minimize caching, the data is flagged as
161/// non-temporal (unlikely to be used again soon).
162///
163/// \headerfile <x86intrin.h>
164///
Logan Chien55afb0a2018-10-15 10:42:14 +0800165/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
Logan Chien2833ffb2018-10-09 10:03:24 +0800166///
167/// \param __p
168/// The 32-bit memory location used to store the register value.
169/// \param __a
170/// The 32-bit single-precision floating-point register value to be stored.
171static __inline__ void __DEFAULT_FN_ATTRS
172_mm_stream_ss(float *__p, __m128 __a)
173{
174 __builtin_ia32_movntss(__p, (__v4sf)__a);
175}
176
177#undef __DEFAULT_FN_ATTRS
178
179#endif /* __AMMINTRIN_H */