blob: bf1b5572fc9525be57dd56990022b10982644c1c [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
Marat Dukhan9fe932e2020-04-11 17:14:15 -07006#include <cstdio>
7#include <cstdlib>
8#include <cstring>
9#include <mutex>
10
11#ifdef __linux__
12 #include <sched.h>
13#endif
Marat Dukhan78583c72020-04-26 22:00:43 -070014#if defined(__ANDROID__) || defined(_WIN32) || defined(__CYGWIN__)
Marat Dukhan452662b2019-10-03 00:14:39 -070015 #include <malloc.h>
16#endif
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070017#if defined(__SSE__) || defined(__x86_64__)
18 #include <xmmintrin.h>
19#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070020
XNNPACK Teamb455b122019-09-27 18:10:33 -070021#include <cpuinfo.h>
22
Zhi An Ng717665f2022-01-10 15:59:11 -080023#include <xnnpack.h>
24#include <xnnpack/allocator.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025
Zhi An Ng717665f2022-01-10 15:59:11 -080026#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070027
28static void* wipe_buffer = nullptr;
29static size_t wipe_buffer_size = 0;
30
Marat Dukhan9fe932e2020-04-11 17:14:15 -070031static std::once_flag wipe_buffer_guard;
XNNPACK Teamb455b122019-09-27 18:10:33 -070032
Marat Dukhan42323232019-10-23 02:09:02 -070033static void InitWipeBuffer() {
XNNPACK Teamb455b122019-09-27 18:10:33 -070034 // Default: the largest know cache size (128 MB Intel Crystalwell L4 cache).
35 wipe_buffer_size = 128 * 1024 * 1024;
36 if (cpuinfo_initialize()) {
Marat Dukhand62f3cc2019-10-01 12:37:52 -070037 wipe_buffer_size = benchmark::utils::GetMaxCacheSize();
XNNPACK Teamb455b122019-09-27 18:10:33 -070038 }
Marat Dukhan78583c72020-04-26 22:00:43 -070039#if defined(_WIN32)
40 wipe_buffer = _aligned_malloc(wipe_buffer_size, 128);
41#elif defined(__ANDROID__) || defined(__CYGWIN__)
XNNPACK Teamb455b122019-09-27 18:10:33 -070042 // memalign is obsolete, but it is the only option on Android until API level 17.
43 wipe_buffer = memalign(128, wipe_buffer_size);
44#else
45 (void) posix_memalign((void**) &wipe_buffer, 128, wipe_buffer_size);
46#endif
47 if (wipe_buffer != nullptr) {
48 memset(wipe_buffer, 0xA5, wipe_buffer_size);
49 }
50}
51
52namespace benchmark {
53namespace utils {
54
Marat Dukhan42323232019-10-23 02:09:02 -070055uint32_t PrefetchToL1(const void* ptr, size_t size) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 uint32_t step = 16;
57 if (cpuinfo_initialize()) {
58 step = cpuinfo_get_l1d_cache(0)->line_size;
59 }
60 const uint8_t* u8_ptr = static_cast<const uint8_t*>(ptr);
61 // Compute and return sum of data to prevent compiler from removing data reads.
62 uint32_t sum = 0;
63 while (size >= step) {
64 sum += uint32_t(*u8_ptr);
65 u8_ptr += step;
66 size -= step;
67 }
68 return sum;
69}
70
Marat Dukhan42323232019-10-23 02:09:02 -070071uint32_t WipeCache() {
Marat Dukhan9fe932e2020-04-11 17:14:15 -070072 std::call_once(wipe_buffer_guard, InitWipeBuffer);
Marat Dukhan42323232019-10-23 02:09:02 -070073 return PrefetchToL1(wipe_buffer, wipe_buffer_size);
XNNPACK Teamb455b122019-09-27 18:10:33 -070074}
75
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070076void DisableDenormals() {
77#if defined(__SSE__) || defined(__x86_64__)
78 _mm_setcsr(_mm_getcsr() | 0x8040);
79#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
80 uint32_t fpscr;
Marat Dukhan09a5df32020-05-07 01:46:23 -070081 #if defined(__thumb__) && !defined(__thumb2__)
82 __asm__ __volatile__(
83 "VMRS %[fpscr], fpscr\n"
84 "ORRS %[fpscr], %[bitmask]\n"
85 "VMSR fpscr, %[fpscr]\n"
86 : [fpscr] "=l" (fpscr)
87 : [bitmask] "l" (0x1000000)
88 : "cc");
89 #else
90 __asm__ __volatile__(
91 "VMRS %[fpscr], fpscr\n"
92 "ORR %[fpscr], #0x1000000\n"
93 "VMSR fpscr, %[fpscr]\n"
94 : [fpscr] "=r" (fpscr));
95 #endif
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070096#elif defined(__aarch64__)
97 uint64_t fpcr;
98 __asm__ __volatile__(
99 "MRS %[fpcr], fpcr\n"
100 "ORR %w[fpcr], %w[fpcr], 0x1000000\n"
101 "ORR %w[fpcr], %w[fpcr], 0x80000\n"
102 "MSR fpcr, %[fpcr]\n"
103 : [fpcr] "=r" (fpcr));
104#endif
105}
106
XNNPACK Teamb455b122019-09-27 18:10:33 -0700107// Return clockrate in Hz
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700108uint64_t GetCurrentCpuFrequency() {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700109#ifdef __linux__
110 int freq = 0;
111 char cpuinfo_name[512];
112 int cpu = sched_getcpu();
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700113 snprintf(cpuinfo_name, sizeof(cpuinfo_name),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700114 "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq", cpu);
115
116 FILE* f = fopen(cpuinfo_name, "r");
117 if (f) {
118 if (fscanf(f, "%d", &freq)) {
119 fclose(f);
120 return uint64_t(freq) * 1000;
121 }
122 fclose(f);
123 }
124#endif // __linux__
125 return 0;
126}
127
Marat Dukhand62f3cc2019-10-01 12:37:52 -0700128size_t GetMaxCacheSize() {
129 if (!cpuinfo_initialize()) {
130 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
131 // DynamIQ max: 4 MB
132 return 4 * 1024 * 1024;
133 #else
134 // Intel eDRAM max: 128 MB
135 return 128 * 1024 * 1024;
136 #endif
137 }
Marat Dukhan9fd7e252020-03-08 19:33:44 -0700138 return cpuinfo_get_max_cache_size();
Marat Dukhand62f3cc2019-10-01 12:37:52 -0700139}
140
Marat Dukhanbad48fe2019-11-04 10:35:22 -0800141void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark) {
142 benchmark->ArgName("T");
143
144 // Disabled thread pool (execution on the caller thread only).
145 benchmark->Arg(1);
146
147 if (cpuinfo_initialize()) {
148 // All cores except the little ones.
149 uint32_t max_cores = cpuinfo_get_cores_count();
150 if (cpuinfo_get_clusters_count() > 1) {
151 max_cores -= cpuinfo_get_cluster(cpuinfo_get_clusters_count() - 1)->core_count;
152 }
153 for (uint32_t t = 2; t <= max_cores; t++) {
154 benchmark->Arg(t);
155 }
156
157 // All cores (if more than one cluster).
158 if (cpuinfo_get_cores_count() > max_cores) {
159 benchmark->Arg(cpuinfo_get_cores_count());
160 }
161
162 // All cores + hyperthreads (only if hyperthreading supported).
163 if (cpuinfo_get_processors_count() > cpuinfo_get_cores_count()) {
164 benchmark->Arg(cpuinfo_get_processors_count());
165 }
166 }
167}
168
Marat Dukhanc8466f52019-11-25 18:01:10 -0800169
Marat Dukhan3b98f6b2020-05-17 10:09:22 -0700170bool CheckVFP(benchmark::State& state) {
171 if (!cpuinfo_initialize() || !(cpuinfo_has_arm_vfpv2() || cpuinfo_has_arm_vfpv3())) {
172 state.SkipWithError("no VFP extension");
173 return false;
174 }
175 return true;
176}
177
Marat Dukhan434352f2021-10-16 18:28:55 -0700178bool CheckNEON(benchmark::State& state) {
179 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon()) {
180 state.SkipWithError("no NEON extension");
Frank Barchard40f50e12020-05-29 22:21:56 -0700181 return false;
182 }
183 return true;
184}
185
Marat Dukhan434352f2021-10-16 18:28:55 -0700186bool CheckNEONFP16(benchmark::State& state) {
187 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_fp16()) {
188 state.SkipWithError("no NEON-FP16 extension");
Marat Dukhanc8466f52019-11-25 18:01:10 -0800189 return false;
190 }
191 return true;
192}
193
194bool CheckNEONFMA(benchmark::State& state) {
195 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_fma()) {
196 state.SkipWithError("no NEON-FMA extension");
197 return false;
198 }
199 return true;
200}
201
Marat Dukhand8e2d712021-07-26 23:35:50 -0700202bool CheckNEONV8(benchmark::State& state) {
203 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_v8()) {
Marat Dukhan563eee12021-12-02 14:44:25 -0800204 state.SkipWithError("no NEON-V8 extension");
Marat Dukhand8e2d712021-07-26 23:35:50 -0700205 return false;
206 }
207 return true;
208}
209
Marat Dukhan434352f2021-10-16 18:28:55 -0700210bool CheckNEONFP16ARITH(benchmark::State& state) {
211 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_fp16_arith()) {
212 state.SkipWithError("no NEON-FP16-ARITH extension");
213 return false;
214 }
215 return true;
216}
217
Benoit Jacoba9644732020-08-13 12:48:55 -0700218bool CheckNEONDOT(benchmark::State& state) {
219 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_dot()) {
220 state.SkipWithError("no NEON-DOT extension");
221 return false;
222 }
223 return true;
224}
225
Marat Dukhan1566fee2020-08-02 21:55:41 -0700226bool CheckSSSE3(benchmark::State& state) {
227 if (!cpuinfo_initialize() || !cpuinfo_has_x86_ssse3()) {
228 state.SkipWithError("no SSSE3 extension");
229 return false;
230 }
231 return true;
232}
233
Marat Dukhanc8466f52019-11-25 18:01:10 -0800234bool CheckSSE41(benchmark::State& state) {
235 if (!cpuinfo_initialize() || !cpuinfo_has_x86_sse4_1()) {
236 state.SkipWithError("no SSE4.1 extension");
237 return false;
238 }
239 return true;
240}
241
242bool CheckAVX(benchmark::State& state) {
243 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx()) {
244 state.SkipWithError("no AVX extension");
245 return false;
246 }
247 return true;
248}
249
Marat Dukhan434352f2021-10-16 18:28:55 -0700250bool CheckF16C(benchmark::State& state) {
251 if (!cpuinfo_initialize() || !cpuinfo_has_x86_f16c()) {
252 state.SkipWithError("no F16C extension");
253 return false;
254 }
255 return true;
256}
257
Marat Dukhan1566fee2020-08-02 21:55:41 -0700258bool CheckXOP(benchmark::State& state) {
259 if (!cpuinfo_initialize() || !cpuinfo_has_x86_xop()) {
260 state.SkipWithError("no XOP extension");
261 return false;
262 }
263 return true;
264}
265
Marat Dukhanc8466f52019-11-25 18:01:10 -0800266bool CheckFMA3(benchmark::State& state) {
267 if (!cpuinfo_initialize() || !cpuinfo_has_x86_fma3()) {
268 state.SkipWithError("no FMA3 extension");
269 return false;
270 }
271 return true;
272}
273
274bool CheckAVX2(benchmark::State& state) {
275 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx2()) {
276 state.SkipWithError("no AVX2 extension");
277 return false;
278 }
279 return true;
280}
281
282bool CheckAVX512F(benchmark::State& state) {
283 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx512f()) {
284 state.SkipWithError("no AVX512F extension");
285 return false;
286 }
287 return true;
288}
289
Marat Dukhanbb00b1d2020-08-10 11:37:23 -0700290bool CheckAVX512SKX(benchmark::State& state) {
291 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx512f() ||
292 !cpuinfo_has_x86_avx512cd() || !cpuinfo_has_x86_avx512bw() ||
293 !cpuinfo_has_x86_avx512dq() || !cpuinfo_has_x86_avx512vl())
294 {
295 state.SkipWithError("no AVX512 SKX extensions");
296 return false;
297 }
298 return true;
299}
300
Zhi An Ng717665f2022-01-10 15:59:11 -0800301CodeMemoryHelper::CodeMemoryHelper() {
302 status = xnn_allocate_code_memory(&buffer, XNN_DEFAULT_CODE_BUFFER_SIZE);
303}
304
305CodeMemoryHelper::~CodeMemoryHelper() {
306 if (status == xnn_status_success) {
307 xnn_release_code_memory(&buffer);
308 }
309}
310
XNNPACK Teamb455b122019-09-27 18:10:33 -0700311} // namespace utils
312} // namespace benchmark