blob: 17049a9ed953b3e96e4ffd3e67ca660cfaa07555 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070013#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070015#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070016#include <xnnpack/params.h>
17#include <xnnpack/rmax.h>
18
19
20static void f32_rmax(
21 benchmark::State& state,
22 xnn_f32_rmax_ukernel_function f32_rmax)
23{
24 const size_t n = state.range(0);
25
26 std::random_device random_device;
27 auto rng = std::mt19937(random_device());
28 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), rng);
29
30 std::vector<float, AlignedAllocator<float, 64>> x(n);
31 std::generate(x.begin(), x.end(), std::ref(f32rng));
32
33 float y;
34 for (auto _ : state) {
35 f32_rmax(n * sizeof(float), x.data(), &y);
36 }
37
Frank Barchardbb4c18b2019-09-30 11:05:52 -070038 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
39
40 state.counters["elements"] =
XNNPACK Teamb455b122019-09-27 18:10:33 -070041 benchmark::Counter(uint64_t(state.iterations()) * n, benchmark::Counter::kIsRate);
42
43 state.counters["bytes"] =
44 benchmark::Counter(uint64_t(state.iterations()) * n * sizeof(float), benchmark::Counter::kIsRate);
45}
46
Marat Dukhan1dadbf72019-10-01 10:46:20 -070047#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -070048 BENCHMARK_CAPTURE(f32_rmax, sse, xnn_f32_rmax_ukernel__sse)
49 ->RangeMultiplier(10)
50 ->Range(1000, 100000000)
51 ->UseRealTime();
52
53 BENCHMARK_CAPTURE(f32_rmax, avx, xnn_f32_rmax_ukernel__avx)
54 ->RangeMultiplier(10)
55 ->Range(1000, 100000000)
56 ->UseRealTime();
57
58 BENCHMARK_CAPTURE(f32_rmax, avx512f, xnn_f32_rmax_ukernel__avx512f)
59 ->RangeMultiplier(10)
60 ->Range(1000, 100000000)
61 ->UseRealTime();
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -070063
Marat Dukhan1dadbf72019-10-01 10:46:20 -070064#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070065 BENCHMARK_CAPTURE(f32_rmax, neon, xnn_f32_rmax_ukernel__neon)
66 ->RangeMultiplier(10)
67 ->Range(1000, 100000000)
68 ->UseRealTime();
Marat Dukhan1dadbf72019-10-01 10:46:20 -070069#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070070BENCHMARK_CAPTURE(f32_rmax, scalar, xnn_f32_rmax_ukernel__scalar)
71 ->RangeMultiplier(10)
72 ->Range(1000, 100000000)
73 ->UseRealTime();
74
75#ifndef XNNPACK_BENCHMARK_NO_MAIN
76BENCHMARK_MAIN();
77#endif