blob: a8b3cba940ef04f05faa2d236a53c4de4bf4717a [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
Marat Dukhan9fe932e2020-04-11 17:14:15 -07006#include <cstdio>
7#include <cstdlib>
8#include <cstring>
9#include <mutex>
10
11#ifdef __linux__
12 #include <sched.h>
13#endif
Marat Dukhan78583c72020-04-26 22:00:43 -070014#if defined(__ANDROID__) || defined(_WIN32) || defined(__CYGWIN__)
Marat Dukhan452662b2019-10-03 00:14:39 -070015 #include <malloc.h>
16#endif
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070017#if defined(__SSE__) || defined(__x86_64__)
18 #include <xmmintrin.h>
19#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070020
XNNPACK Teamb455b122019-09-27 18:10:33 -070021#include <cpuinfo.h>
22
23#include "bench/utils.h"
24
25
26static void* wipe_buffer = nullptr;
27static size_t wipe_buffer_size = 0;
28
Marat Dukhan9fe932e2020-04-11 17:14:15 -070029static std::once_flag wipe_buffer_guard;
XNNPACK Teamb455b122019-09-27 18:10:33 -070030
Marat Dukhan42323232019-10-23 02:09:02 -070031static void InitWipeBuffer() {
XNNPACK Teamb455b122019-09-27 18:10:33 -070032 // Default: the largest know cache size (128 MB Intel Crystalwell L4 cache).
33 wipe_buffer_size = 128 * 1024 * 1024;
34 if (cpuinfo_initialize()) {
Marat Dukhand62f3cc2019-10-01 12:37:52 -070035 wipe_buffer_size = benchmark::utils::GetMaxCacheSize();
XNNPACK Teamb455b122019-09-27 18:10:33 -070036 }
Marat Dukhan78583c72020-04-26 22:00:43 -070037#if defined(_WIN32)
38 wipe_buffer = _aligned_malloc(wipe_buffer_size, 128);
39#elif defined(__ANDROID__) || defined(__CYGWIN__)
XNNPACK Teamb455b122019-09-27 18:10:33 -070040 // memalign is obsolete, but it is the only option on Android until API level 17.
41 wipe_buffer = memalign(128, wipe_buffer_size);
42#else
43 (void) posix_memalign((void**) &wipe_buffer, 128, wipe_buffer_size);
44#endif
45 if (wipe_buffer != nullptr) {
46 memset(wipe_buffer, 0xA5, wipe_buffer_size);
47 }
48}
49
50namespace benchmark {
51namespace utils {
52
Marat Dukhan42323232019-10-23 02:09:02 -070053uint32_t PrefetchToL1(const void* ptr, size_t size) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070054 uint32_t step = 16;
55 if (cpuinfo_initialize()) {
56 step = cpuinfo_get_l1d_cache(0)->line_size;
57 }
58 const uint8_t* u8_ptr = static_cast<const uint8_t*>(ptr);
59 // Compute and return sum of data to prevent compiler from removing data reads.
60 uint32_t sum = 0;
61 while (size >= step) {
62 sum += uint32_t(*u8_ptr);
63 u8_ptr += step;
64 size -= step;
65 }
66 return sum;
67}
68
Marat Dukhan42323232019-10-23 02:09:02 -070069uint32_t WipeCache() {
Marat Dukhan9fe932e2020-04-11 17:14:15 -070070 std::call_once(wipe_buffer_guard, InitWipeBuffer);
Marat Dukhan42323232019-10-23 02:09:02 -070071 return PrefetchToL1(wipe_buffer, wipe_buffer_size);
XNNPACK Teamb455b122019-09-27 18:10:33 -070072}
73
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070074void DisableDenormals() {
75#if defined(__SSE__) || defined(__x86_64__)
76 _mm_setcsr(_mm_getcsr() | 0x8040);
77#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
78 uint32_t fpscr;
Marat Dukhan09a5df32020-05-07 01:46:23 -070079 #if defined(__thumb__) && !defined(__thumb2__)
80 __asm__ __volatile__(
81 "VMRS %[fpscr], fpscr\n"
82 "ORRS %[fpscr], %[bitmask]\n"
83 "VMSR fpscr, %[fpscr]\n"
84 : [fpscr] "=l" (fpscr)
85 : [bitmask] "l" (0x1000000)
86 : "cc");
87 #else
88 __asm__ __volatile__(
89 "VMRS %[fpscr], fpscr\n"
90 "ORR %[fpscr], #0x1000000\n"
91 "VMSR fpscr, %[fpscr]\n"
92 : [fpscr] "=r" (fpscr));
93 #endif
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070094#elif defined(__aarch64__)
95 uint64_t fpcr;
96 __asm__ __volatile__(
97 "MRS %[fpcr], fpcr\n"
98 "ORR %w[fpcr], %w[fpcr], 0x1000000\n"
99 "ORR %w[fpcr], %w[fpcr], 0x80000\n"
100 "MSR fpcr, %[fpcr]\n"
101 : [fpcr] "=r" (fpcr));
102#endif
103}
104
XNNPACK Teamb455b122019-09-27 18:10:33 -0700105// Return clockrate in Hz
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700106uint64_t GetCurrentCpuFrequency() {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700107#ifdef __linux__
108 int freq = 0;
109 char cpuinfo_name[512];
110 int cpu = sched_getcpu();
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700111 snprintf(cpuinfo_name, sizeof(cpuinfo_name),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq", cpu);
113
114 FILE* f = fopen(cpuinfo_name, "r");
115 if (f) {
116 if (fscanf(f, "%d", &freq)) {
117 fclose(f);
118 return uint64_t(freq) * 1000;
119 }
120 fclose(f);
121 }
122#endif // __linux__
123 return 0;
124}
125
Marat Dukhand62f3cc2019-10-01 12:37:52 -0700126size_t GetMaxCacheSize() {
127 if (!cpuinfo_initialize()) {
128 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
129 // DynamIQ max: 4 MB
130 return 4 * 1024 * 1024;
131 #else
132 // Intel eDRAM max: 128 MB
133 return 128 * 1024 * 1024;
134 #endif
135 }
Marat Dukhan9fd7e252020-03-08 19:33:44 -0700136 return cpuinfo_get_max_cache_size();
Marat Dukhand62f3cc2019-10-01 12:37:52 -0700137}
138
Marat Dukhanbad48fe2019-11-04 10:35:22 -0800139void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark) {
140 benchmark->ArgName("T");
141
142 // Disabled thread pool (execution on the caller thread only).
143 benchmark->Arg(1);
144
145 if (cpuinfo_initialize()) {
146 // All cores except the little ones.
147 uint32_t max_cores = cpuinfo_get_cores_count();
148 if (cpuinfo_get_clusters_count() > 1) {
149 max_cores -= cpuinfo_get_cluster(cpuinfo_get_clusters_count() - 1)->core_count;
150 }
151 for (uint32_t t = 2; t <= max_cores; t++) {
152 benchmark->Arg(t);
153 }
154
155 // All cores (if more than one cluster).
156 if (cpuinfo_get_cores_count() > max_cores) {
157 benchmark->Arg(cpuinfo_get_cores_count());
158 }
159
160 // All cores + hyperthreads (only if hyperthreading supported).
161 if (cpuinfo_get_processors_count() > cpuinfo_get_cores_count()) {
162 benchmark->Arg(cpuinfo_get_processors_count());
163 }
164 }
165}
166
Marat Dukhanc8466f52019-11-25 18:01:10 -0800167
Marat Dukhan3b98f6b2020-05-17 10:09:22 -0700168bool CheckVFP(benchmark::State& state) {
169 if (!cpuinfo_initialize() || !(cpuinfo_has_arm_vfpv2() || cpuinfo_has_arm_vfpv3())) {
170 state.SkipWithError("no VFP extension");
171 return false;
172 }
173 return true;
174}
175
Frank Barchard40f50e12020-05-29 22:21:56 -0700176bool CheckNEONFP16ARITH(benchmark::State& state) {
177 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_fp16_arith()) {
178 state.SkipWithError("no NEON-FP16-ARITH extension");
179 return false;
180 }
181 return true;
182}
183
Marat Dukhanc8466f52019-11-25 18:01:10 -0800184bool CheckNEON(benchmark::State& state) {
185 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon()) {
186 state.SkipWithError("no NEON extension");
187 return false;
188 }
189 return true;
190}
191
192bool CheckNEONFMA(benchmark::State& state) {
193 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_fma()) {
194 state.SkipWithError("no NEON-FMA extension");
195 return false;
196 }
197 return true;
198}
199
Benoit Jacoba9644732020-08-13 12:48:55 -0700200bool CheckNEONDOT(benchmark::State& state) {
201 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_dot()) {
202 state.SkipWithError("no NEON-DOT extension");
203 return false;
204 }
205 return true;
206}
207
Marat Dukhan1566fee2020-08-02 21:55:41 -0700208bool CheckSSSE3(benchmark::State& state) {
209 if (!cpuinfo_initialize() || !cpuinfo_has_x86_ssse3()) {
210 state.SkipWithError("no SSSE3 extension");
211 return false;
212 }
213 return true;
214}
215
Marat Dukhanc8466f52019-11-25 18:01:10 -0800216bool CheckSSE41(benchmark::State& state) {
217 if (!cpuinfo_initialize() || !cpuinfo_has_x86_sse4_1()) {
218 state.SkipWithError("no SSE4.1 extension");
219 return false;
220 }
221 return true;
222}
223
224bool CheckAVX(benchmark::State& state) {
225 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx()) {
226 state.SkipWithError("no AVX extension");
227 return false;
228 }
229 return true;
230}
231
Marat Dukhan1566fee2020-08-02 21:55:41 -0700232bool CheckXOP(benchmark::State& state) {
233 if (!cpuinfo_initialize() || !cpuinfo_has_x86_xop()) {
234 state.SkipWithError("no XOP extension");
235 return false;
236 }
237 return true;
238}
239
Marat Dukhanc8466f52019-11-25 18:01:10 -0800240bool CheckFMA3(benchmark::State& state) {
241 if (!cpuinfo_initialize() || !cpuinfo_has_x86_fma3()) {
242 state.SkipWithError("no FMA3 extension");
243 return false;
244 }
245 return true;
246}
247
248bool CheckAVX2(benchmark::State& state) {
249 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx2()) {
250 state.SkipWithError("no AVX2 extension");
251 return false;
252 }
253 return true;
254}
255
256bool CheckAVX512F(benchmark::State& state) {
257 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx512f()) {
258 state.SkipWithError("no AVX512F extension");
259 return false;
260 }
261 return true;
262}
263
Marat Dukhanbb00b1d2020-08-10 11:37:23 -0700264bool CheckAVX512SKX(benchmark::State& state) {
265 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx512f() ||
266 !cpuinfo_has_x86_avx512cd() || !cpuinfo_has_x86_avx512bw() ||
267 !cpuinfo_has_x86_avx512dq() || !cpuinfo_has_x86_avx512vl())
268 {
269 state.SkipWithError("no AVX512 SKX extensions");
270 return false;
271 }
272 return true;
273}
274
XNNPACK Teamb455b122019-09-27 18:10:33 -0700275} // namespace utils
276} // namespace benchmark