blob: bed0ce908c305f9a488ef81051b19add72a661c5 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
XNNPACK Teamb455b122019-09-27 18:10:33 -07006#include <pthread.h>
7#include <sched.h>
Marat Dukhan452662b2019-10-03 00:14:39 -07008#ifdef __ANDROID__
9 #include <malloc.h>
10#endif
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070011#if defined(__SSE__) || defined(__x86_64__)
12 #include <xmmintrin.h>
13#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070014
15#include <cstdio>
16#include <cstdlib>
17#include <cstring>
18
19#include <cpuinfo.h>
20
21#include "bench/utils.h"
22
23
24static void* wipe_buffer = nullptr;
25static size_t wipe_buffer_size = 0;
26
Marat Dukhan42323232019-10-23 02:09:02 -070027static pthread_once_t wipe_buffer_guard = PTHREAD_ONCE_INIT;
XNNPACK Teamb455b122019-09-27 18:10:33 -070028
Marat Dukhan42323232019-10-23 02:09:02 -070029static void InitWipeBuffer() {
XNNPACK Teamb455b122019-09-27 18:10:33 -070030 // Default: the largest know cache size (128 MB Intel Crystalwell L4 cache).
31 wipe_buffer_size = 128 * 1024 * 1024;
32 if (cpuinfo_initialize()) {
Marat Dukhand62f3cc2019-10-01 12:37:52 -070033 wipe_buffer_size = benchmark::utils::GetMaxCacheSize();
XNNPACK Teamb455b122019-09-27 18:10:33 -070034 }
35#if defined(__ANDROID__)
36 // memalign is obsolete, but it is the only option on Android until API level 17.
37 wipe_buffer = memalign(128, wipe_buffer_size);
Yasuhiro Matsumoto462be052020-02-29 13:41:14 +090038#elif defined(_WIN32)
39 wipe_buffer = _aligned_malloc(wipe_buffer_size, 128);
XNNPACK Teamb455b122019-09-27 18:10:33 -070040#else
41 (void) posix_memalign((void**) &wipe_buffer, 128, wipe_buffer_size);
42#endif
43 if (wipe_buffer != nullptr) {
44 memset(wipe_buffer, 0xA5, wipe_buffer_size);
45 }
46}
47
48namespace benchmark {
49namespace utils {
50
Marat Dukhan42323232019-10-23 02:09:02 -070051uint32_t PrefetchToL1(const void* ptr, size_t size) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070052 uint32_t step = 16;
53 if (cpuinfo_initialize()) {
54 step = cpuinfo_get_l1d_cache(0)->line_size;
55 }
56 const uint8_t* u8_ptr = static_cast<const uint8_t*>(ptr);
57 // Compute and return sum of data to prevent compiler from removing data reads.
58 uint32_t sum = 0;
59 while (size >= step) {
60 sum += uint32_t(*u8_ptr);
61 u8_ptr += step;
62 size -= step;
63 }
64 return sum;
65}
66
Marat Dukhan42323232019-10-23 02:09:02 -070067uint32_t WipeCache() {
68 pthread_once(&wipe_buffer_guard, &InitWipeBuffer);
69 return PrefetchToL1(wipe_buffer, wipe_buffer_size);
XNNPACK Teamb455b122019-09-27 18:10:33 -070070}
71
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -070072void DisableDenormals() {
73#if defined(__SSE__) || defined(__x86_64__)
74 _mm_setcsr(_mm_getcsr() | 0x8040);
75#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
76 uint32_t fpscr;
77 __asm__ __volatile__(
78 "VMRS %[fpscr], fpscr\n"
79 "ORR %[fpscr], #0x1000000\n"
80 "VMSR fpscr, %[fpscr]\n"
81 : [fpscr] "=r" (fpscr));
82#elif defined(__aarch64__)
83 uint64_t fpcr;
84 __asm__ __volatile__(
85 "MRS %[fpcr], fpcr\n"
86 "ORR %w[fpcr], %w[fpcr], 0x1000000\n"
87 "ORR %w[fpcr], %w[fpcr], 0x80000\n"
88 "MSR fpcr, %[fpcr]\n"
89 : [fpcr] "=r" (fpcr));
90#endif
91}
92
XNNPACK Teamb455b122019-09-27 18:10:33 -070093// Return clockrate in Hz
Frank Barchardbb4c18b2019-09-30 11:05:52 -070094uint64_t GetCurrentCpuFrequency() {
XNNPACK Teamb455b122019-09-27 18:10:33 -070095#ifdef __linux__
96 int freq = 0;
97 char cpuinfo_name[512];
98 int cpu = sched_getcpu();
Frank Barchardbb4c18b2019-09-30 11:05:52 -070099 snprintf(cpuinfo_name, sizeof(cpuinfo_name),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700100 "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq", cpu);
101
102 FILE* f = fopen(cpuinfo_name, "r");
103 if (f) {
104 if (fscanf(f, "%d", &freq)) {
105 fclose(f);
106 return uint64_t(freq) * 1000;
107 }
108 fclose(f);
109 }
110#endif // __linux__
111 return 0;
112}
113
Marat Dukhand62f3cc2019-10-01 12:37:52 -0700114size_t GetMaxCacheSize() {
115 if (!cpuinfo_initialize()) {
116 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
117 // DynamIQ max: 4 MB
118 return 4 * 1024 * 1024;
119 #else
120 // Intel eDRAM max: 128 MB
121 return 128 * 1024 * 1024;
122 #endif
123 }
Marat Dukhan9fd7e252020-03-08 19:33:44 -0700124 return cpuinfo_get_max_cache_size();
Marat Dukhand62f3cc2019-10-01 12:37:52 -0700125}
126
Marat Dukhanbad48fe2019-11-04 10:35:22 -0800127void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark) {
128 benchmark->ArgName("T");
129
130 // Disabled thread pool (execution on the caller thread only).
131 benchmark->Arg(1);
132
133 if (cpuinfo_initialize()) {
134 // All cores except the little ones.
135 uint32_t max_cores = cpuinfo_get_cores_count();
136 if (cpuinfo_get_clusters_count() > 1) {
137 max_cores -= cpuinfo_get_cluster(cpuinfo_get_clusters_count() - 1)->core_count;
138 }
139 for (uint32_t t = 2; t <= max_cores; t++) {
140 benchmark->Arg(t);
141 }
142
143 // All cores (if more than one cluster).
144 if (cpuinfo_get_cores_count() > max_cores) {
145 benchmark->Arg(cpuinfo_get_cores_count());
146 }
147
148 // All cores + hyperthreads (only if hyperthreading supported).
149 if (cpuinfo_get_processors_count() > cpuinfo_get_cores_count()) {
150 benchmark->Arg(cpuinfo_get_processors_count());
151 }
152 }
153}
154
Marat Dukhanc8466f52019-11-25 18:01:10 -0800155
156bool CheckNEON(benchmark::State& state) {
157 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon()) {
158 state.SkipWithError("no NEON extension");
159 return false;
160 }
161 return true;
162}
163
164bool CheckNEONFMA(benchmark::State& state) {
165 if (!cpuinfo_initialize() || !cpuinfo_has_arm_neon_fma()) {
166 state.SkipWithError("no NEON-FMA extension");
167 return false;
168 }
169 return true;
170}
171
172bool CheckSSE41(benchmark::State& state) {
173 if (!cpuinfo_initialize() || !cpuinfo_has_x86_sse4_1()) {
174 state.SkipWithError("no SSE4.1 extension");
175 return false;
176 }
177 return true;
178}
179
180bool CheckAVX(benchmark::State& state) {
181 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx()) {
182 state.SkipWithError("no AVX extension");
183 return false;
184 }
185 return true;
186}
187
188bool CheckFMA3(benchmark::State& state) {
189 if (!cpuinfo_initialize() || !cpuinfo_has_x86_fma3()) {
190 state.SkipWithError("no FMA3 extension");
191 return false;
192 }
193 return true;
194}
195
196bool CheckAVX2(benchmark::State& state) {
197 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx2()) {
198 state.SkipWithError("no AVX2 extension");
199 return false;
200 }
201 return true;
202}
203
204bool CheckAVX512F(benchmark::State& state) {
205 if (!cpuinfo_initialize() || !cpuinfo_has_x86_avx512f()) {
206 state.SkipWithError("no AVX512F extension");
207 return false;
208 }
209 return true;
210}
211
XNNPACK Teamb455b122019-09-27 18:10:33 -0700212} // namespace utils
213} // namespace benchmark