blob: 91440b1acfad21ee37fca407877850d4e49c0bb4 [file] [log] [blame]
Yann Collet32fb4072017-08-18 16:52:05 -07001/*
Nick Terrellac58c8d2020-03-26 15:19:05 -07002 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
Yann Collet32fb4072017-08-18 16:52:05 -07003 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
Yann Collet3128e032017-09-08 00:09:23 -07008 * You may select, at your option, one of the above-listed licenses.
Yann Collet32fb4072017-08-18 16:52:05 -07009 */
10
Nick Terrell565e9252017-08-14 17:20:50 -070011#ifndef ZSTD_COMPILER_H
12#define ZSTD_COMPILER_H
13
14/*-*******************************************************
15* Compiler specifics
16*********************************************************/
17/* force inlining */
W. Felix Handte9d5f3962018-11-16 16:43:57 -080018
19#if !defined(ZSTD_NO_INLINE)
W. Felix Handte952427a2020-04-30 17:20:40 -040020#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
Nick Terrell565e9252017-08-14 17:20:50 -070021# define INLINE_KEYWORD inline
22#else
23# define INLINE_KEYWORD
24#endif
25
Joseph Chen3855bc42019-07-29 15:20:37 +080026#if defined(__GNUC__) || defined(__ICCARM__)
Nick Terrell565e9252017-08-14 17:20:50 -070027# define FORCE_INLINE_ATTR __attribute__((always_inline))
28#elif defined(_MSC_VER)
29# define FORCE_INLINE_ATTR __forceinline
30#else
31# define FORCE_INLINE_ATTR
32#endif
33
W. Felix Handte9d5f3962018-11-16 16:43:57 -080034#else
35
36#define INLINE_KEYWORD
37#define FORCE_INLINE_ATTR
38
39#endif
40
Niadb8c115cb2020-06-19 09:48:35 -060041/**
Niadb74f65f62020-06-19 09:51:00 -060042 On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
Niadb8c115cb2020-06-19 09:48:35 -060043 This explictly marks such functions as __cdecl so that the code will still compile
44 if a CC other than __cdecl has been made the default.
45*/
Niadba4c8aa52020-06-19 03:31:47 -060046#if defined(_MSC_VER)
47# define WIN_CDECL __cdecl
48#else
49# define WIN_CDECL
50#endif
51
Nick Terrell565e9252017-08-14 17:20:50 -070052/**
53 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
Josh Sorefa880ca22019-04-12 14:18:11 -040054 * parameters. They must be inlined for the compiler to eliminate the constant
Nick Terrell565e9252017-08-14 17:20:50 -070055 * branches.
56 */
57#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
58/**
59 * HINT_INLINE is used to help the compiler generate better code. It is *not*
60 * used for "templates", so it can be tweaked based on the compilers
61 * performance.
62 *
63 * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
64 * always_inline attribute.
65 *
66 * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
67 * attribute.
68 */
69#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
70# define HINT_INLINE static INLINE_KEYWORD
71#else
72# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
73#endif
74
Nick Terrell5cb76152019-09-20 21:37:13 -070075/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
76#if defined(__GNUC__)
77# define UNUSED_ATTR __attribute__((unused))
78#else
79# define UNUSED_ATTR
80#endif
81
Nick Terrell565e9252017-08-14 17:20:50 -070082/* force no inlining */
83#ifdef _MSC_VER
84# define FORCE_NOINLINE static __declspec(noinline)
85#else
Joseph Chen3855bc42019-07-29 15:20:37 +080086# if defined(__GNUC__) || defined(__ICCARM__)
Nick Terrell565e9252017-08-14 17:20:50 -070087# define FORCE_NOINLINE static __attribute__((__noinline__))
88# else
89# define FORCE_NOINLINE static
90# endif
91#endif
92
Nick Terrell43191322018-02-02 18:03:09 -080093/* target attribute */
Yann Colletad15c1b2018-03-23 19:04:48 -070094#ifndef __has_attribute
95 #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
96#endif
Joseph Chen3855bc42019-07-29 15:20:37 +080097#if defined(__GNUC__) || defined(__ICCARM__)
Nick Terrell43191322018-02-02 18:03:09 -080098# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
99#else
100# define TARGET_ATTRIBUTE(target)
101#endif
102
103/* Enable runtime BMI2 dispatch based on the CPU.
Yann Collet45b09e72018-03-01 15:02:18 -0800104 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
Nick Terrell43191322018-02-02 18:03:09 -0800105 */
106#ifndef DYNAMIC_BMI2
taigacon2c3ad052018-04-24 06:41:50 +0800107 #if ((defined(__clang__) && __has_attribute(__target__)) \
Yann Colletd02b44c2018-03-04 16:05:59 -0800108 || (defined(__GNUC__) \
taigacon2c3ad052018-04-24 06:41:50 +0800109 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
Yann Colletd02b44c2018-03-04 16:05:59 -0800110 && (defined(__x86_64__) || defined(_M_X86)) \
111 && !defined(__BMI2__)
Nick Terrell43191322018-02-02 18:03:09 -0800112 # define DYNAMIC_BMI2 1
113 #else
114 # define DYNAMIC_BMI2 0
115 #endif
116#endif
117
Yann Colletbbd78df2018-07-06 17:06:04 -0700118/* prefetch
Yann Collet9126da52018-11-08 12:47:46 -0800119 * can be disabled, by declaring NO_PREFETCH build macro */
Yann Colletbbd78df2018-07-06 17:06:04 -0700120#if defined(NO_PREFETCH)
Yann Collet9126da52018-11-08 12:47:46 -0800121# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
Yann Collet626040a2018-11-12 17:05:32 -0800122# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
Yann Colletbbd78df2018-07-06 17:06:04 -0700123#else
124# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
125# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
Yann Collet9126da52018-11-08 12:47:46 -0800126# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
Yann Collet626040a2018-11-12 17:05:32 -0800127# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
caoyzh72019802020-03-14 15:25:09 +0800128# elif defined(__aarch64__)
129# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
130# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
Yann Colletbbd78df2018-07-06 17:06:04 -0700131# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
Yann Collet9126da52018-11-08 12:47:46 -0800132# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
Yann Collet626040a2018-11-12 17:05:32 -0800133# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
Yann Colletbbd78df2018-07-06 17:06:04 -0700134# else
Yann Collet9126da52018-11-08 12:47:46 -0800135# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
Yann Collet626040a2018-11-12 17:05:32 -0800136# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
Yann Colletbbd78df2018-07-06 17:06:04 -0700137# endif
138#endif /* NO_PREFETCH */
Nick Terrell565e9252017-08-14 17:20:50 -0700139
Yann Collet4de344d2018-09-12 10:29:47 -0700140#define CACHELINE_SIZE 64
141
Yann Collet26182532018-09-12 16:15:37 -0700142#define PREFETCH_AREA(p, s) { \
143 const char* const _ptr = (const char*)(p); \
144 size_t const _size = (size_t)(s); \
145 size_t _pos; \
146 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
Yann Collet626040a2018-11-12 17:05:32 -0800147 PREFETCH_L2(_ptr + _pos); \
Yann Collet26182532018-09-12 16:15:37 -0700148 } \
Yann Collet63a519d2018-09-11 17:23:44 -0700149}
150
Carl Woffenden901ea612019-08-21 17:49:17 +0200151/* vectorization
152 * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
Bimba Shrestha85d0efd2020-03-05 10:02:48 -0800153#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
Carl Woffenden901ea612019-08-21 17:49:17 +0200154# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
155# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
156# else
157# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
158# endif
mgrice812e8f22019-07-11 15:31:07 -0700159#else
160# define DONT_VECTORIZE
161#endif
162
Nick Terrell718f00f2019-11-25 18:26:19 -0800163/* Tell the compiler that a branch is likely or unlikely.
164 * Only use these macros if it causes the compiler to generate better code.
165 * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
166 * and clang, please do.
167 */
168#if defined(__GNUC__)
169#define LIKELY(x) (__builtin_expect((x), 1))
170#define UNLIKELY(x) (__builtin_expect((x), 0))
171#else
172#define LIKELY(x) (x)
173#define UNLIKELY(x) (x)
174#endif
175
Nick Terrell565e9252017-08-14 17:20:50 -0700176/* disable warnings */
177#ifdef _MSC_VER /* Visual Studio */
178# include <intrin.h> /* For Visual 2005 */
179# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
180# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
181# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
182# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
183# pragma warning(disable : 4324) /* disable: C4324: padded structure */
184#endif
185
Niadb493fd402020-07-28 02:52:15 -0600186/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
187#ifndef STATIC_BMI2
188# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
189# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
190# define STATIC_BMI2 1
191# endif
192# endif
193#endif
194
195#ifndef STATIC_BMI2
196 #define STATIC_BMI2 0
197#endif
198
Nick Terrell565e9252017-08-14 17:20:50 -0700199#endif /* ZSTD_COMPILER_H */