blob: 79b7f0655cf011901edcbdffa5a2c7ae1db405c2 [file] [log] [blame]
Logan Chiendf4f7662019-09-04 16:45:23 -07001/* ===-------- ia32intrin.h ---------------------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __X86INTRIN_H
11#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
12#endif
13
14#ifndef __IA32INTRIN_H
15#define __IA32INTRIN_H
16
17/** Find the first set bit starting from the lsb. Result is undefined if
18 * input is 0.
19 *
20 * \headerfile <x86intrin.h>
21 *
22 * This intrinsic corresponds to the <c> BSF </c> instruction or the
23 * <c> TZCNT </c> instruction.
24 *
25 * \param __A
26 * A 32-bit integer operand.
27 * \returns A 32-bit integer containing the bit number.
28 */
29static __inline__ int __attribute__((__always_inline__, __nodebug__))
30__bsfd(int __A) {
31 return __builtin_ctz(__A);
32}
33
34/** Find the first set bit starting from the msb. Result is undefined if
35 * input is 0.
36 *
37 * \headerfile <x86intrin.h>
38 *
39 * This intrinsic corresponds to the <c> BSR </c> instruction or the
40 * <c> LZCNT </c> instruction and an <c> XOR </c>.
41 *
42 * \param __A
43 * A 32-bit integer operand.
44 * \returns A 32-bit integer containing the bit number.
45 */
46static __inline__ int __attribute__((__always_inline__, __nodebug__))
47__bsrd(int __A) {
48 return 31 - __builtin_clz(__A);
49}
50
51/** Swaps the bytes in the input. Converting little endian to big endian or
52 * vice versa.
53 *
54 * \headerfile <x86intrin.h>
55 *
56 * This intrinsic corresponds to the <c> BSWAP </c> instruction.
57 *
58 * \param __A
59 * A 32-bit integer operand.
60 * \returns A 32-bit integer containing the swapped bytes.
61 */
62static __inline__ int __attribute__((__always_inline__, __nodebug__))
63__bswapd(int __A) {
64 return __builtin_bswap32(__A);
65}
66
67static __inline__ int __attribute__((__always_inline__, __nodebug__))
68_bswap(int __A) {
69 return __builtin_bswap32(__A);
70}
71
72#define _bit_scan_forward(A) __bsfd((A))
73#define _bit_scan_reverse(A) __bsrd((A))
74
75#ifdef __x86_64__
76/** Find the first set bit starting from the lsb. Result is undefined if
77 * input is 0.
78 *
79 * \headerfile <x86intrin.h>
80 *
81 * This intrinsic corresponds to the <c> BSF </c> instruction or the
82 * <c> TZCNT </c> instruction.
83 *
84 * \param __A
85 * A 64-bit integer operand.
86 * \returns A 32-bit integer containing the bit number.
87 */
88static __inline__ int __attribute__((__always_inline__, __nodebug__))
89__bsfq(long long __A) {
90 return __builtin_ctzll(__A);
91}
92
93/** Find the first set bit starting from the msb. Result is undefined if
94 * input is 0.
95 *
96 * \headerfile <x86intrin.h>
97 *
98 * This intrinsic corresponds to the <c> BSR </c> instruction or the
99 * <c> LZCNT </c> instruction and an <c> XOR </c>.
100 *
101 * \param __A
102 * A 64-bit integer operand.
103 * \returns A 32-bit integer containing the bit number.
104 */
105static __inline__ int __attribute__((__always_inline__, __nodebug__))
106__bsrq(long long __A) {
107 return 63 - __builtin_clzll(__A);
108}
109
110/** Swaps the bytes in the input. Converting little endian to big endian or
111 * vice versa.
112 *
113 * \headerfile <x86intrin.h>
114 *
115 * This intrinsic corresponds to the <c> BSWAP </c> instruction.
116 *
117 * \param __A
118 * A 64-bit integer operand.
119 * \returns A 64-bit integer containing the swapped bytes.
120 */
121static __inline__ long long __attribute__((__always_inline__, __nodebug__))
122__bswapq(long long __A) {
123 return __builtin_bswap64(__A);
124}
125
126#define _bswap64(A) __bswapq((A))
127#endif
128
129/** Counts the number of bits in the source operand having a value of 1.
130 *
131 * \headerfile <x86intrin.h>
132 *
133 * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
134 * a sequence of arithmetic and logic ops to calculate it.
135 *
136 * \param __A
137 * An unsigned 32-bit integer operand.
138 * \returns A 32-bit integer containing the number of bits with value 1 in the
139 * source operand.
140 */
141static __inline__ int __attribute__((__always_inline__, __nodebug__))
142__popcntd(unsigned int __A)
143{
144 return __builtin_popcount(__A);
145}
146
147#define _popcnt32(A) __popcntd((A))
148
149#ifdef __x86_64__
150/** Counts the number of bits in the source operand having a value of 1.
151 *
152 * \headerfile <x86intrin.h>
153 *
154 * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
155 * a sequence of arithmetic and logic ops to calculate it.
156 *
157 * \param __A
158 * An unsigned 64-bit integer operand.
159 * \returns A 64-bit integer containing the number of bits with value 1 in the
160 * source operand.
161 */
162static __inline__ long long __attribute__((__always_inline__, __nodebug__))
163__popcntq(unsigned long long __A)
164{
165 return __builtin_popcountll(__A);
166}
167
168#define _popcnt64(A) __popcntq((A))
169#endif /* __x86_64__ */
170
171#ifdef __x86_64__
172static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
173__readeflags(void)
174{
175 return __builtin_ia32_readeflags_u64();
176}
177
178static __inline__ void __attribute__((__always_inline__, __nodebug__))
179__writeeflags(unsigned long long __f)
180{
181 __builtin_ia32_writeeflags_u64(__f);
182}
183
184#else /* !__x86_64__ */
185static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
186__readeflags(void)
187{
188 return __builtin_ia32_readeflags_u32();
189}
190
191static __inline__ void __attribute__((__always_inline__, __nodebug__))
192__writeeflags(unsigned int __f)
193{
194 __builtin_ia32_writeeflags_u32(__f);
195}
196#endif /* !__x86_64__ */
197
Sasha Smundak746b0222020-02-25 09:19:04 -0800198/** Cast a 32-bit float value to a 32-bit unsigned integer value
199 *
200 * \headerfile <x86intrin.h>
201 * This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
202 * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
203 *
204 * \param __A
205 * A 32-bit float value.
206 * \returns a 32-bit unsigned integer containing the converted value.
207 */
208static __inline__ unsigned int __attribute__((__always_inline__))
209_castf32_u32(float __A) {
210 unsigned int D;
211 __builtin_memcpy(&D, &__A, sizeof(__A));
212 return D;
213}
214
215/** Cast a 64-bit float value to a 64-bit unsigned integer value
216 *
217 * \headerfile <x86intrin.h>
218 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
219 * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
220 *
221 * \param __A
222 * A 64-bit float value.
223 * \returns a 64-bit unsigned integer containing the converted value.
224 */
225static __inline__ unsigned long long __attribute__((__always_inline__))
226_castf64_u64(double __A) {
227 unsigned long long D;
228 __builtin_memcpy(&D, &__A, sizeof(__A));
229 return D;
230}
231
232/** Cast a 32-bit unsigned integer value to a 32-bit float value
233 *
234 * \headerfile <x86intrin.h>
235 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
236 * and corresponds to the <c> FLDS </c> instruction in ia32.
237 *
238 * \param __A
239 * A 32-bit unsigned integer value.
240 * \returns a 32-bit float value containing the converted value.
241 */
242static __inline__ float __attribute__((__always_inline__))
243_castu32_f32(unsigned int __A) {
244 float D;
245 __builtin_memcpy(&D, &__A, sizeof(__A));
246 return D;
247}
248
249/** Cast a 64-bit unsigned integer value to a 64-bit float value
250 *
251 * \headerfile <x86intrin.h>
252 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
253 * and corresponds to the <c> FLDL </c> instruction in ia32.
254 *
255 * \param __A
256 * A 64-bit unsigned integer value.
257 * \returns a 64-bit float value containing the converted value.
258 */
259static __inline__ double __attribute__((__always_inline__))
260_castu64_f64(unsigned long long __A) {
261 double D;
262 __builtin_memcpy(&D, &__A, sizeof(__A));
263 return D;
264}
265
Logan Chiendf4f7662019-09-04 16:45:23 -0700266/** Adds the unsigned integer operand to the CRC-32C checksum of the
267 * unsigned char operand.
268 *
269 * \headerfile <x86intrin.h>
270 *
271 * This intrinsic corresponds to the <c> CRC32B </c> instruction.
272 *
273 * \param __C
274 * An unsigned integer operand to add to the CRC-32C checksum of operand
275 * \a __D.
276 * \param __D
277 * An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
278 * \returns The result of adding operand \a __C to the CRC-32C checksum of
279 * operand \a __D.
280 */
281static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
282__crc32b(unsigned int __C, unsigned char __D)
283{
284 return __builtin_ia32_crc32qi(__C, __D);
285}
286
287/** Adds the unsigned integer operand to the CRC-32C checksum of the
288 * unsigned short operand.
289 *
290 * \headerfile <x86intrin.h>
291 *
292 * This intrinsic corresponds to the <c> CRC32W </c> instruction.
293 *
294 * \param __C
295 * An unsigned integer operand to add to the CRC-32C checksum of operand
296 * \a __D.
297 * \param __D
298 * An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
299 * \returns The result of adding operand \a __C to the CRC-32C checksum of
300 * operand \a __D.
301 */
302static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
303__crc32w(unsigned int __C, unsigned short __D)
304{
305 return __builtin_ia32_crc32hi(__C, __D);
306}
307
308/** Adds the unsigned integer operand to the CRC-32C checksum of the
309 * second unsigned integer operand.
310 *
311 * \headerfile <x86intrin.h>
312 *
313 * This intrinsic corresponds to the <c> CRC32D </c> instruction.
314 *
315 * \param __C
316 * An unsigned integer operand to add to the CRC-32C checksum of operand
317 * \a __D.
318 * \param __D
319 * An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
320 * \returns The result of adding operand \a __C to the CRC-32C checksum of
321 * operand \a __D.
322 */
323static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
324__crc32d(unsigned int __C, unsigned int __D)
325{
326 return __builtin_ia32_crc32si(__C, __D);
327}
328
329#ifdef __x86_64__
330/** Adds the unsigned integer operand to the CRC-32C checksum of the
331 * unsigned 64-bit integer operand.
332 *
333 * \headerfile <x86intrin.h>
334 *
335 * This intrinsic corresponds to the <c> CRC32Q </c> instruction.
336 *
337 * \param __C
338 * An unsigned integer operand to add to the CRC-32C checksum of operand
339 * \a __D.
340 * \param __D
341 * An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
342 * \returns The result of adding operand \a __C to the CRC-32C checksum of
343 * operand \a __D.
344 */
345static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
346__crc32q(unsigned long long __C, unsigned long long __D)
347{
348 return __builtin_ia32_crc32di(__C, __D);
349}
350#endif /* __x86_64__ */
351
352static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
353__rdpmc(int __A) {
354 return __builtin_ia32_rdpmc(__A);
355}
356
357/* __rdtscp */
358static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
359__rdtscp(unsigned int *__A) {
360 return __builtin_ia32_rdtscp(__A);
361}
362
363#define _rdtsc() __rdtsc()
364
365#define _rdpmc(A) __rdpmc(A)
366
367static __inline__ void __attribute__((__always_inline__, __nodebug__))
368_wbinvd(void) {
369 __builtin_ia32_wbinvd();
370}
371
372static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
373__rolb(unsigned char __X, int __C) {
374 return __builtin_rotateleft8(__X, __C);
375}
376
377static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
378__rorb(unsigned char __X, int __C) {
379 return __builtin_rotateright8(__X, __C);
380}
381
382static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
383__rolw(unsigned short __X, int __C) {
384 return __builtin_rotateleft16(__X, __C);
385}
386
387static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
388__rorw(unsigned short __X, int __C) {
389 return __builtin_rotateright16(__X, __C);
390}
391
392static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
393__rold(unsigned int __X, int __C) {
394 return __builtin_rotateleft32(__X, __C);
395}
396
397static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
398__rord(unsigned int __X, int __C) {
399 return __builtin_rotateright32(__X, __C);
400}
401
402#ifdef __x86_64__
403static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
404__rolq(unsigned long long __X, int __C) {
405 return __builtin_rotateleft64(__X, __C);
406}
407
408static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
409__rorq(unsigned long long __X, int __C) {
410 return __builtin_rotateright64(__X, __C);
411}
412#endif /* __x86_64__ */
413
414#ifndef _MSC_VER
415/* These are already provided as builtins for MSVC. */
416/* Select the correct function based on the size of long. */
417#ifdef __LP64__
418#define _lrotl(a,b) __rolq((a), (b))
419#define _lrotr(a,b) __rorq((a), (b))
420#else
421#define _lrotl(a,b) __rold((a), (b))
422#define _lrotr(a,b) __rord((a), (b))
423#endif
424#define _rotl(a,b) __rold((a), (b))
425#define _rotr(a,b) __rord((a), (b))
426#endif // _MSC_VER
427
428/* These are not builtins so need to be provided in all modes. */
429#define _rotwl(a,b) __rolw((a), (b))
430#define _rotwr(a,b) __rorw((a), (b))
431
432#endif /* __IA32INTRIN_H */