blob: 47995d40335316965941dd6e955d876cfce374cb [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070013#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070015#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070016#include <xnnpack/params.h>
17#include <xnnpack/rmax.h>
18
19
20static void f32_rmax(
21 benchmark::State& state,
Marat Dukhan4e895872020-12-04 15:27:45 -080022 xnn_f32_rmax_ukernel_function f32_rmax,
23 benchmark::utils::IsaCheckFunction isa_check = nullptr)
XNNPACK Teamb455b122019-09-27 18:10:33 -070024{
Marat Dukhan4e895872020-12-04 15:27:45 -080025 if (isa_check && !isa_check(state)) {
26 return;
27 }
28
Marat Dukhand713e8a2020-12-04 14:23:12 -080029 const size_t elements = state.range(0);
XNNPACK Teamb455b122019-09-27 18:10:33 -070030
31 std::random_device random_device;
32 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070033 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070034
Marat Dukhand713e8a2020-12-04 14:23:12 -080035 std::vector<float, AlignedAllocator<float, 64>> x(elements);
XNNPACK Teamb455b122019-09-27 18:10:33 -070036 std::generate(x.begin(), x.end(), std::ref(f32rng));
37
38 float y;
39 for (auto _ : state) {
Marat Dukhand713e8a2020-12-04 14:23:12 -080040 f32_rmax(elements * sizeof(float), x.data(), &y);
XNNPACK Teamb455b122019-09-27 18:10:33 -070041 }
42
Marat Dukhand713e8a2020-12-04 14:23:12 -080043 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
44 if (cpu_frequency != 0) {
45 state.counters["cpufreq"] = cpu_frequency;
46 }
Frank Barchardbb4c18b2019-09-30 11:05:52 -070047
Marat Dukhand713e8a2020-12-04 14:23:12 -080048 const size_t elements_per_iteration = elements;
49 state.counters["elements"] =
50 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
XNNPACK Teamb455b122019-09-27 18:10:33 -070051
Marat Dukhand713e8a2020-12-04 14:23:12 -080052 const size_t bytes_per_iteration = elements * sizeof(float);
XNNPACK Teamb455b122019-09-27 18:10:33 -070053 state.counters["bytes"] =
Marat Dukhand713e8a2020-12-04 14:23:12 -080054 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
XNNPACK Teamb455b122019-09-27 18:10:33 -070055}
56
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 BENCHMARK_CAPTURE(f32_rmax, sse, xnn_f32_rmax_ukernel__sse)
59 ->RangeMultiplier(10)
60 ->Range(1000, 100000000)
61 ->UseRealTime();
62
Marat Dukhan4e895872020-12-04 15:27:45 -080063 BENCHMARK_CAPTURE(f32_rmax, avx, xnn_f32_rmax_ukernel__avx, benchmark::utils::CheckAVX)
XNNPACK Teamb455b122019-09-27 18:10:33 -070064 ->RangeMultiplier(10)
65 ->Range(1000, 100000000)
66 ->UseRealTime();
67
Marat Dukhan4e895872020-12-04 15:27:45 -080068 BENCHMARK_CAPTURE(f32_rmax, avx512f, xnn_f32_rmax_ukernel__avx512f, benchmark::utils::CheckAVX512F)
XNNPACK Teamb455b122019-09-27 18:10:33 -070069 ->RangeMultiplier(10)
70 ->Range(1000, 100000000)
71 ->UseRealTime();
Marat Dukhan1dadbf72019-10-01 10:46:20 -070072#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -070073
Marat Dukhan1dadbf72019-10-01 10:46:20 -070074#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan4e895872020-12-04 15:27:45 -080075 BENCHMARK_CAPTURE(f32_rmax, neon, xnn_f32_rmax_ukernel__neon, benchmark::utils::CheckNEON)
XNNPACK Teamb455b122019-09-27 18:10:33 -070076 ->RangeMultiplier(10)
77 ->Range(1000, 100000000)
78 ->UseRealTime();
Marat Dukhan1dadbf72019-10-01 10:46:20 -070079#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan8c417962020-07-08 12:27:50 -070080
81#if XNN_ARCH_WASMSIMD
82 BENCHMARK_CAPTURE(f32_rmax, wasmsimd_arm, xnn_f32_rmax_ukernel__wasmsimd_arm)
83 ->RangeMultiplier(10)
84 ->Range(1000, 100000000)
85 ->UseRealTime();
86
87 BENCHMARK_CAPTURE(f32_rmax, wasmsimd_x86, xnn_f32_rmax_ukernel__wasmsimd_x86)
88 ->RangeMultiplier(10)
89 ->Range(1000, 100000000)
90 ->UseRealTime();
91#endif // XNN_ARCH_WASMSIMD
92
XNNPACK Teamb455b122019-09-27 18:10:33 -070093BENCHMARK_CAPTURE(f32_rmax, scalar, xnn_f32_rmax_ukernel__scalar)
94 ->RangeMultiplier(10)
95 ->Range(1000, 100000000)
96 ->UseRealTime();
97
98#ifndef XNNPACK_BENCHMARK_NO_MAIN
99BENCHMARK_MAIN();
100#endif