XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | // Copyright 2019 Google LLC |
| 2 | // |
| 3 | // This source code is licensed under the BSD-style license found in the |
| 4 | // LICENSE file in the root directory of this source tree. |
| 5 | |
| 6 | #include <algorithm> |
| 7 | #include <cmath> |
| 8 | #include <functional> |
| 9 | #include <random> |
| 10 | #include <vector> |
| 11 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 12 | #include <benchmark/benchmark.h> |
Frank Barchard | bb4c18b | 2019-09-30 11:05:52 -0700 | [diff] [blame] | 13 | #include "bench/utils.h" |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 14 | #include <xnnpack/AlignedAllocator.h> |
Marat Dukhan | 1dadbf7 | 2019-10-01 10:46:20 -0700 | [diff] [blame] | 15 | #include <xnnpack/common.h> |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 16 | #include <xnnpack/params.h> |
| 17 | #include <xnnpack/rmax.h> |
| 18 | |
| 19 | |
| 20 | static void f32_rmax( |
| 21 | benchmark::State& state, |
| 22 | xnn_f32_rmax_ukernel_function f32_rmax) |
| 23 | { |
| 24 | const size_t n = state.range(0); |
| 25 | |
| 26 | std::random_device random_device; |
| 27 | auto rng = std::mt19937(random_device()); |
| 28 | auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), rng); |
| 29 | |
| 30 | std::vector<float, AlignedAllocator<float, 64>> x(n); |
| 31 | std::generate(x.begin(), x.end(), std::ref(f32rng)); |
| 32 | |
| 33 | float y; |
| 34 | for (auto _ : state) { |
| 35 | f32_rmax(n * sizeof(float), x.data(), &y); |
| 36 | } |
| 37 | |
Frank Barchard | bb4c18b | 2019-09-30 11:05:52 -0700 | [diff] [blame] | 38 | state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency(); |
| 39 | |
| 40 | state.counters["elements"] = |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 41 | benchmark::Counter(uint64_t(state.iterations()) * n, benchmark::Counter::kIsRate); |
| 42 | |
| 43 | state.counters["bytes"] = |
| 44 | benchmark::Counter(uint64_t(state.iterations()) * n * sizeof(float), benchmark::Counter::kIsRate); |
| 45 | } |
| 46 | |
Marat Dukhan | 1dadbf7 | 2019-10-01 10:46:20 -0700 | [diff] [blame] | 47 | #if XNN_ARCH_X86 || XNN_ARCH_X86_64 |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 48 | BENCHMARK_CAPTURE(f32_rmax, sse, xnn_f32_rmax_ukernel__sse) |
| 49 | ->RangeMultiplier(10) |
| 50 | ->Range(1000, 100000000) |
| 51 | ->UseRealTime(); |
| 52 | |
| 53 | BENCHMARK_CAPTURE(f32_rmax, avx, xnn_f32_rmax_ukernel__avx) |
| 54 | ->RangeMultiplier(10) |
| 55 | ->Range(1000, 100000000) |
| 56 | ->UseRealTime(); |
| 57 | |
| 58 | BENCHMARK_CAPTURE(f32_rmax, avx512f, xnn_f32_rmax_ukernel__avx512f) |
| 59 | ->RangeMultiplier(10) |
| 60 | ->Range(1000, 100000000) |
| 61 | ->UseRealTime(); |
Marat Dukhan | 1dadbf7 | 2019-10-01 10:46:20 -0700 | [diff] [blame] | 62 | #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 63 | |
Marat Dukhan | 1dadbf7 | 2019-10-01 10:46:20 -0700 | [diff] [blame] | 64 | #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 65 | BENCHMARK_CAPTURE(f32_rmax, neon, xnn_f32_rmax_ukernel__neon) |
| 66 | ->RangeMultiplier(10) |
| 67 | ->Range(1000, 100000000) |
| 68 | ->UseRealTime(); |
Marat Dukhan | 1dadbf7 | 2019-10-01 10:46:20 -0700 | [diff] [blame] | 69 | #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 70 | BENCHMARK_CAPTURE(f32_rmax, scalar, xnn_f32_rmax_ukernel__scalar) |
| 71 | ->RangeMultiplier(10) |
| 72 | ->Range(1000, 100000000) |
| 73 | ->UseRealTime(); |
| 74 | |
| 75 | #ifndef XNNPACK_BENCHMARK_NO_MAIN |
| 76 | BENCHMARK_MAIN(); |
| 77 | #endif |