XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | // Copyright 2019 Google LLC |
| 2 | // |
| 3 | // This source code is licensed under the BSD-style license found in the |
| 4 | // LICENSE file in the root directory of this source tree. |
| 5 | |
| 6 | #pragma once |
| 7 | |
| 8 | #include <cstddef> |
| 9 | #include <cstdint> |
| 10 | |
Marat Dukhan | bad48fe | 2019-11-04 10:35:22 -0800 | [diff] [blame] | 11 | #include <benchmark/benchmark.h> |
| 12 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 13 | namespace benchmark { |
| 14 | namespace utils { |
| 15 | |
Marat Dukhan | 4232323 | 2019-10-23 02:09:02 -0700 | [diff] [blame] | 16 | uint32_t WipeCache(); |
| 17 | uint32_t PrefetchToL1(const void* ptr, size_t size); |
Marat Dukhan | d62f3cc | 2019-10-01 12:37:52 -0700 | [diff] [blame] | 18 | |
Marat Dukhan | 4a4a7fa | 2019-10-21 13:46:14 -0700 | [diff] [blame] | 19 | // Disable support for denormalized numbers in floating-point units. |
| 20 | void DisableDenormals(); |
| 21 | |
Marat Dukhan | d62f3cc | 2019-10-01 12:37:52 -0700 | [diff] [blame] | 22 | // Return clock rate, in Hz, for the currently used logical processor. |
| 23 | uint64_t GetCurrentCpuFrequency(); |
| 24 | |
| 25 | // Return maximum (across all cores/clusters/sockets) last level cache size. |
| 26 | // Can overestimate, but not underestimate LLC size. |
| 27 | size_t GetMaxCacheSize(); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 28 | |
Marat Dukhan | bad48fe | 2019-11-04 10:35:22 -0800 | [diff] [blame] | 29 | // Set multi-threading parameters appropriate for the processor. |
| 30 | void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark); |
| 31 | |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 32 | typedef bool (*IsaCheckFunction)(benchmark::State& state); |
| 33 | |
Marat Dukhan | 3b98f6b | 2020-05-17 10:09:22 -0700 | [diff] [blame] | 34 | // Check if either ARM VFPv2 or VFPv3 extension is supported. |
| 35 | // If VFP is unsupported, report error in benchmark state, and return false. |
| 36 | bool CheckVFP(benchmark::State& state); |
| 37 | |
Frank Barchard | 40f50e1 | 2020-05-29 22:21:56 -0700 | [diff] [blame] | 38 | // Check if ARM NEON-FP16-ARITH extension is supported. |
| 39 | // If NEON-FP16-ARITH is unsupported, report error in benchmark state, and return false. |
| 40 | bool CheckNEONFP16ARITH(benchmark::State& state); |
| 41 | |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 42 | // Check if ARM NEON extension is supported. |
| 43 | // If NEON is unsupported, report error in benchmark state, and return false. |
| 44 | bool CheckNEON(benchmark::State& state); |
| 45 | |
| 46 | // Check if ARM NEON-FMA extension is supported. |
| 47 | // If NEON-FMA is unsupported, report error in benchmark state, and return false. |
| 48 | bool CheckNEONFMA(benchmark::State& state); |
| 49 | |
Benoit Jacob | a964473 | 2020-08-13 12:48:55 -0700 | [diff] [blame] | 50 | // Check if ARM DOT extension is supported. |
| 51 | // If DOT is unsupported, report error in benchmark state, and return false. |
| 52 | bool CheckNEONDOT(benchmark::State& state); |
| 53 | |
Marat Dukhan | 1566fee | 2020-08-02 21:55:41 -0700 | [diff] [blame] | 54 | // Check if x86 SSSE3 extension is supported. |
| 55 | // If SSSE3 is unsupported, report error in benchmark state, and return false. |
| 56 | bool CheckSSSE3(benchmark::State& state); |
| 57 | |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 58 | // Check if x86 SSE4.1 extension is supported. |
| 59 | // If SSE4.1 is unsupported, report error in benchmark state, and return false. |
| 60 | bool CheckSSE41(benchmark::State& state); |
| 61 | |
| 62 | // Check if x86 AVX extension is supported. |
| 63 | // If AVX is unsupported, report error in benchmark state, and return false. |
| 64 | bool CheckAVX(benchmark::State& state); |
| 65 | |
Marat Dukhan | 1566fee | 2020-08-02 21:55:41 -0700 | [diff] [blame] | 66 | // Check if x86 XOP extension is supported. |
| 67 | // If XOP is unsupported, report error in benchmark state, and return false. |
| 68 | bool CheckXOP(benchmark::State& state); |
| 69 | |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 70 | // Check if x86 FMA3 extension is supported. |
| 71 | // If FMA3 is unsupported, report error in benchmark state, and return false. |
| 72 | bool CheckFMA3(benchmark::State& state); |
| 73 | |
| 74 | // Check if x86 AVX2 extension is supported. |
| 75 | // If AVX2 is unsupported, report error in benchmark state, and return false. |
| 76 | bool CheckAVX2(benchmark::State& state); |
| 77 | |
| 78 | // Check if x86 AVX512F extension is supported. |
| 79 | // If AVX512F is unsupported, report error in benchmark state, and return false. |
| 80 | bool CheckAVX512F(benchmark::State& state); |
| 81 | |
Marat Dukhan | bb00b1d | 2020-08-10 11:37:23 -0700 | [diff] [blame] | 82 | // Check if x86 SKX-level AVX512 extensions (AVX512F, AVX512CD, AVX512BW, AVX512DQ, and AVX512VL) are supported. |
| 83 | // If SKX-level AVX512 extensions are unsupported, report error in benchmark state, and return false. |
| 84 | bool CheckAVX512SKX(benchmark::State& state); |
| 85 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 86 | template <class T> |
Marat Dukhan | 4232323 | 2019-10-23 02:09:02 -0700 | [diff] [blame] | 87 | inline T DivideRoundUp(T x, T q) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 88 | return x / q + T(x % q != 0); |
| 89 | } |
| 90 | |
| 91 | template <class T> |
Marat Dukhan | 4232323 | 2019-10-23 02:09:02 -0700 | [diff] [blame] | 92 | inline T RoundUp(T x, T q) { |
| 93 | return q * DivideRoundUp(x, q); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 94 | } |
| 95 | |
| 96 | template <class T> |
Marat Dukhan | 4232323 | 2019-10-23 02:09:02 -0700 | [diff] [blame] | 97 | inline T Doz(T a, T b) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 98 | return a >= b ? a - b : T(0); |
| 99 | } |
| 100 | |
| 101 | } // namespace utils |
| 102 | } // namespace benchmark |