blob: 1aebd3dffe6d9da11c191521121585df520cdf71 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
XNNPACK Teamb455b122019-09-27 18:10:33 -070013#include <benchmark/benchmark.h>
14#include "bench/dwconv.h"
15#include "bench/utils.h"
16#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070017#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070018#include <xnnpack/dwconv.h>
19#include <xnnpack/indirection.h>
20#include <xnnpack/operator.h>
21#include <xnnpack/pack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070022#include <xnnpack/params-init.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070023#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024
25
26static void DWConvBenchmark(benchmark::State& state,
Marat Dukhan163a7e62020-04-09 04:19:26 -070027 xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv,
Marat Dukhan42b441b2021-07-26 17:40:52 -070028 xnn_init_f32_minmax_params_fn init_params,
29 uint32_t channel_tile, uint32_t primary_tile,
Marat Dukhanc8466f52019-11-25 18:01:10 -080030 benchmark::utils::IsaCheckFunction isa_check = nullptr)
XNNPACK Teamb455b122019-09-27 18:10:33 -070031{
Marat Dukhanc8466f52019-11-25 18:01:10 -080032 if (isa_check && !isa_check(state)) {
33 return;
34 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070035
36 const size_t input_height = state.range(0);
37 const size_t input_width = state.range(1);
38 const size_t kernel_height = state.range(2);
39 const size_t kernel_width = state.range(3);
40 const size_t padding_height = state.range(4);
41 const size_t padding_width = state.range(5);
42 const size_t subsampling = state.range(6);
43 const size_t dilation = state.range(7);
44 const size_t channels = state.range(8);
45
46 const size_t kernel_size = kernel_height * kernel_width;
Marat Dukhan42b441b2021-07-26 17:40:52 -070047 if (kernel_size != primary_tile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070048 state.SkipWithError("kernel size mismatch");
49 return;
50 }
51
52 std::random_device random_device;
53 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070054 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070055
56 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
57 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
58 const size_t padding_left = padding_width / 2;
59 const size_t padding_top = padding_height / 2;
60 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
61 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
62 const size_t output_size = output_height * output_width;
63 const size_t step_width = dilation == 1 ? subsampling : kernel_width;
Marat Dukhan03ff2942019-12-05 09:32:26 -080064 const size_t step_height = kernel_size + (output_width - 1) * step_width * kernel_height;
XNNPACK Teamb455b122019-09-27 18:10:33 -070065
Marat Dukhan42b441b2021-07-26 17:40:52 -070066 const size_t c_stride = benchmark::utils::RoundUp<size_t>(channels, channel_tile);
XNNPACK Teamb455b122019-09-27 18:10:33 -070067
Marat Dukhanad74a7b2019-12-05 06:18:39 -080068 std::vector<float> a(channels * input_height * input_width + XNN_EXTRA_BYTES / sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -070069 std::generate(a.begin(), a.end(), std::ref(f32rng));
70 std::vector<float> k(channels * kernel_height * kernel_width);
71 std::generate(k.begin(), k.end(), std::ref(f32rng));
72 std::vector<float> b(channels);
73 std::generate(b.begin(), b.end(), std::ref(f32rng));
74
Marat Dukhanad74a7b2019-12-05 06:18:39 -080075 std::vector<float> z(channels + XNN_EXTRA_BYTES / sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -070076
77 const size_t w_elements = (kernel_size + 1) * c_stride;
78 const size_t i_elements = output_height * step_height;
79 const size_t c_elements = output_size * channels;
80 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -070081 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -070082 sizeof(float) * (w_elements + c_elements) + sizeof(void*) * i_elements);
83
Marat Dukhane13e6392021-07-26 22:22:35 -070084 std::vector<float, AlignedAllocator<float, 64>> w(w_elements * num_buffers);
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 std::fill(w.begin(), w.end(), 0.0f);
Marat Dukhan42b441b2021-07-26 17:40:52 -070086 xnn_pack_f32_dwconv_ghw_w(kernel_height, kernel_width, channels, channel_tile,
Marat Dukhan82286892021-06-04 17:27:27 -070087 k.data(), b.data(), w.data(), 0 /* extra bytes */, nullptr);
XNNPACK Teamb455b122019-09-27 18:10:33 -070088 for (size_t n = 1; n < num_buffers; n++) {
89 std::copy(w.cbegin(), w.cbegin() + w_elements, w.begin() + n * w_elements);
90 }
91
92 std::vector<const float*> i(i_elements * num_buffers);
93 xnn_operator convolution_op = { };
94 convolution_op.indirection_buffer = reinterpret_cast<const void**>(i.data());
95 convolution_op.input = a.data();
96 convolution_op.input_pixel_stride = channels;
97 convolution_op.zero_buffer = z.data();
XNNPACK Teamb455b122019-09-27 18:10:33 -070098 convolution_op.input_height = input_height;
99 convolution_op.input_width = input_width;
100 convolution_op.output_height = output_height;
101 convolution_op.output_width = output_width;
102 convolution_op.kernel_height = kernel_height;
103 convolution_op.kernel_width = kernel_width;
104 convolution_op.stride_height = subsampling;
105 convolution_op.stride_width = subsampling;
106 convolution_op.dilation_height = dilation;
107 convolution_op.dilation_width = dilation;
108 convolution_op.padding_top = padding_top;
109 convolution_op.padding_left = padding_left;
110
Marat Dukhanc79427c2020-10-15 09:04:21 -0700111 xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 2 /* log2(sizeof(float)) */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 for (size_t n = 1; n < num_buffers; n++) {
113 std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
114 }
115
116 std::vector<float> c(c_elements * num_buffers);
117 std::fill(c.begin(), c.end(), std::nanf(""));
118
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700119 xnn_f32_minmax_params params;
Marat Dukhan42b441b2021-07-26 17:40:52 -0700120 init_params(&params, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700121
122 size_t buffer_index = 0;
123 for (auto _ : state) {
124 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700125 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126 buffer_index = (buffer_index + 1) % num_buffers;
127 state.ResumeTiming();
128
Marat Dukhanc79427c2020-10-15 09:04:21 -0700129 for (size_t y = 0; y < output_height; y++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700130 dwconv(channels, output_width,
131 i.data() + buffer_index * i_elements + step_height * y,
132 w.data() + buffer_index * w_elements,
133 c.data() + buffer_index * c_elements + y * output_width * channels,
134 kernel_height * step_width * sizeof(void*), 0,
Frank Barchardd5360722020-05-17 16:10:36 -0700135 0, z.data(), &params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700136 }
137 }
138
Marat Dukhand713e8a2020-12-04 14:23:12 -0800139 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
140 if (cpu_frequency != 0) {
141 state.counters["cpufreq"] = cpu_frequency;
142 }
143
XNNPACK Teamb455b122019-09-27 18:10:33 -0700144 state.counters["FLOPS"] = benchmark::Counter(
145 uint64_t(state.iterations()) * 2 * output_size * channels * kernel_size,
146 benchmark::Counter::kIsRate);
147
Marat Dukhand713e8a2020-12-04 14:23:12 -0800148 state.counters["bytes"] = benchmark::Counter(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700149 uint64_t(state.iterations()) * (output_size + input_height * input_width + kernel_size + 1 /* bias */) * channels * sizeof(float),
150 benchmark::Counter::kIsRate);
151}
152
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700153
Frank Barchard7e955972019-10-11 10:34:25 -0700154#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700155 static void f32_dwconv_4x9__aarch64_neonfma(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700156 DWConvBenchmark(state,
157 xnn_f32_dwconv_minmax_ukernel_up4x9__neon,
158 xnn_init_f32_minmax_scalar_params,
159 4 /* channel tile */, 9 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700160 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700161 static void f32_dwconv_4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700162 DWConvBenchmark(state,
163 xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma,
164 xnn_init_f32_minmax_scalar_params,
165 4 /* channel tile */, 9 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700166 }
167
168 BENCHMARK_DWCONV(f32_dwconv_4x9__aarch64_neonfma)
169 BENCHMARK_DWCONV(f32_dwconv_4x9__aarch64_neonfma_cortex_a55)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700170#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700171
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700172
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700173#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard2b95ea12020-06-01 11:58:43 -0700174 static void f32_dwconv_4x4__neon_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700175 DWConvBenchmark(state,
176 xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2,
177 xnn_init_f32_minmax_scalar_params,
178 4 /* channel tile */, 4 /* primary tile */,
179 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700180 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700181 static void f32_dwconv_4x4__neon(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700182 DWConvBenchmark(state,
183 xnn_f32_dwconv_minmax_ukernel_up4x4__neon,
184 xnn_init_f32_minmax_scalar_params,
185 4 /* channel tile */, 4 /* primary tile */,
186 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700187 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700188 static void f32_dwconv_4x4__neonfma_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700189 DWConvBenchmark(state,
190 xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2,
191 xnn_init_f32_minmax_scalar_params,
192 4 /* channel tile */, 4 /* primary tile */,
193 benchmark::utils::CheckNEONFMA);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700194 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700195 static void f32_dwconv_4x4__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700196 DWConvBenchmark(state,
197 xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma,
198 xnn_init_f32_minmax_scalar_params,
199 4 /* channel tile */, 4 /* primary tile */,
200 benchmark::utils::CheckNEONFMA);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700201 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700202 static void f32_dwconv_4x9__neon_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700203 DWConvBenchmark(state,
204 xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2,
205 xnn_init_f32_minmax_scalar_params,
206 4 /* channel tile */, 9 /* primary tile */,
207 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700208 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700209 static void f32_dwconv_4x9__neon(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700210 DWConvBenchmark(state,
211 xnn_f32_dwconv_minmax_ukernel_up4x9__neon,
212 xnn_init_f32_minmax_scalar_params,
213 4 /* channel tile */, 9 /* primary tile */,
214 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700215 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700216 static void f32_dwconv_4x9__neonfma_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700217 DWConvBenchmark(state,
218 xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2,
219 xnn_init_f32_minmax_scalar_params,
220 4 /* channel tile */, 9 /* primary tile */,
221 benchmark::utils::CheckNEONFMA);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700222 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700223 static void f32_dwconv_4x9__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700224 DWConvBenchmark(state,
225 xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma,
226 xnn_init_f32_minmax_scalar_params,
227 4 /* channel tile */, 9 /* primary tile */,
228 benchmark::utils::CheckNEONFMA);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700229 }
Frank Barchardc9f9d672021-10-18 12:51:59 -0700230 static void f32_dwconv_4x25__neon_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700231 DWConvBenchmark(state,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700232 xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700233 xnn_init_f32_minmax_scalar_params,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700234 4 /* channel tile */, 25 /* primary tile */,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700235 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700236 }
Frank Barchardc9f9d672021-10-18 12:51:59 -0700237 static void f32_dwconv_4x25__neon(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700238 DWConvBenchmark(state,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700239 xnn_f32_dwconv_minmax_ukernel_up4x25__neon,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700240 xnn_init_f32_minmax_scalar_params,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700241 4 /* channel tile */, 25 /* primary tile */,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700242 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700243 }
Frank Barchardc9f9d672021-10-18 12:51:59 -0700244 static void f32_dwconv_4x25__neonfma_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700245 DWConvBenchmark(state,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700246 xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700247 xnn_init_f32_minmax_scalar_params,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700248 4 /* channel tile */, 25 /* primary tile */,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700249 benchmark::utils::CheckNEONFMA);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700250 }
Frank Barchardc9f9d672021-10-18 12:51:59 -0700251 static void f32_dwconv_4x25__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700252 DWConvBenchmark(state,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700253 xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700254 xnn_init_f32_minmax_scalar_params,
Frank Barchardc9f9d672021-10-18 12:51:59 -0700255 4 /* channel tile */, 25 /* primary tile */,
Marat Dukhan42b441b2021-07-26 17:40:52 -0700256 benchmark::utils::CheckNEONFMA);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700257 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700258 static void f32_dwconv_8x4__neon_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700259 DWConvBenchmark(state,
260 xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2,
261 xnn_init_f32_minmax_scalar_params,
262 8 /* channel tile */, 4 /* primary tile */,
263 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700264 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700265 static void f32_dwconv_8x4__neon(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700266 DWConvBenchmark(state,
267 xnn_f32_dwconv_minmax_ukernel_up8x4__neon,
268 xnn_init_f32_minmax_scalar_params,
269 8 /* channel tile */, 4 /* primary tile */,
270 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700271 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700272 static void f32_dwconv_8x4__neonfma_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700273 DWConvBenchmark(state,
274 xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2,
275 xnn_init_f32_minmax_scalar_params,
276 8 /* channel tile */, 4 /* primary tile */,
277 benchmark::utils::CheckNEONFMA);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700278 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700279 static void f32_dwconv_8x4__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700280 DWConvBenchmark(state,
281 xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma,
282 xnn_init_f32_minmax_scalar_params,
283 8 /* channel tile */, 4 /* primary tile */,
284 benchmark::utils::CheckNEONFMA);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700285 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700286 static void f32_dwconv_8x9__neon_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700287 DWConvBenchmark(state,
288 xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2,
289 xnn_init_f32_minmax_scalar_params,
290 8 /* channel tile */, 9 /* primary tile */,
291 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700292 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700293 static void f32_dwconv_8x9__neon(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700294 DWConvBenchmark(state,
295 xnn_f32_dwconv_minmax_ukernel_up8x9__neon,
296 xnn_init_f32_minmax_scalar_params,
297 8 /* channel tile */, 9 /* primary tile */,
298 benchmark::utils::CheckNEON);
Frank Barchard2b95ea12020-06-01 11:58:43 -0700299 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700300 static void f32_dwconv_8x9__neonfma_acc2(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700301 DWConvBenchmark(state,
302 xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2,
303 xnn_init_f32_minmax_scalar_params,
304 8 /* channel tile */, 9 /* primary tile */,
305 benchmark::utils::CheckNEONFMA);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700306 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700307 static void f32_dwconv_8x9__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700308 DWConvBenchmark(state,
309 xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma,
310 xnn_init_f32_minmax_scalar_params,
311 8 /* channel tile */, 9 /* primary tile */,
312 benchmark::utils::CheckNEONFMA);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700313 }
Frank Barchardc9f9d672021-10-18 12:51:59 -0700314 static void f32_dwconv_8x25__neon_acc2(benchmark::State& state, const char* net) {
315 DWConvBenchmark(state,
316 xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2,
317 xnn_init_f32_minmax_scalar_params,
318 8 /* channel tile */, 25 /* primary tile */,
319 benchmark::utils::CheckNEON);
320 }
321 static void f32_dwconv_8x25__neon(benchmark::State& state, const char* net) {
322 DWConvBenchmark(state,
323 xnn_f32_dwconv_minmax_ukernel_up8x25__neon,
324 xnn_init_f32_minmax_scalar_params,
325 8 /* channel tile */, 25 /* primary tile */,
326 benchmark::utils::CheckNEON);
327 }
328 static void f32_dwconv_8x25__neonfma_acc2(benchmark::State& state, const char* net) {
329 DWConvBenchmark(state,
330 xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2,
331 xnn_init_f32_minmax_scalar_params,
332 8 /* channel tile */, 25 /* primary tile */,
333 benchmark::utils::CheckNEONFMA);
334 }
335 static void f32_dwconv_8x25__neonfma(benchmark::State& state, const char* net) {
336 DWConvBenchmark(state,
337 xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma,
338 xnn_init_f32_minmax_scalar_params,
339 8 /* channel tile */, 25 /* primary tile */,
340 benchmark::utils::CheckNEONFMA);
341 }
342 static void f32_dwconv_16x4__neon_acc2(benchmark::State& state, const char* net) {
343 DWConvBenchmark(state,
344 xnn_f32_dwconv_minmax_ukernel_up16x4__neon_acc2,
345 xnn_init_f32_minmax_scalar_params,
346 16 /* channel tile */, 4 /* primary tile */,
347 benchmark::utils::CheckNEON);
348 }
349 static void f32_dwconv_16x4__neon(benchmark::State& state, const char* net) {
350 DWConvBenchmark(state,
351 xnn_f32_dwconv_minmax_ukernel_up16x4__neon,
352 xnn_init_f32_minmax_scalar_params,
353 16 /* channel tile */, 4 /* primary tile */,
354 benchmark::utils::CheckNEON);
355 }
356 static void f32_dwconv_16x4__neonfma_acc2(benchmark::State& state, const char* net) {
357 DWConvBenchmark(state,
358 xnn_f32_dwconv_minmax_ukernel_up16x4__neonfma_acc2,
359 xnn_init_f32_minmax_scalar_params,
360 16 /* channel tile */, 4 /* primary tile */,
361 benchmark::utils::CheckNEONFMA);
362 }
363 static void f32_dwconv_16x4__neonfma(benchmark::State& state, const char* net) {
364 DWConvBenchmark(state,
365 xnn_f32_dwconv_minmax_ukernel_up16x4__neonfma,
366 xnn_init_f32_minmax_scalar_params,
367 16 /* channel tile */, 4 /* primary tile */,
368 benchmark::utils::CheckNEONFMA);
369 }
370 static void f32_dwconv_16x9__neon_acc2(benchmark::State& state, const char* net) {
371 DWConvBenchmark(state,
372 xnn_f32_dwconv_minmax_ukernel_up16x9__neon_acc2,
373 xnn_init_f32_minmax_scalar_params,
374 16 /* channel tile */, 9 /* primary tile */,
375 benchmark::utils::CheckNEON);
376 }
377 static void f32_dwconv_16x9__neon(benchmark::State& state, const char* net) {
378 DWConvBenchmark(state,
379 xnn_f32_dwconv_minmax_ukernel_up16x9__neon,
380 xnn_init_f32_minmax_scalar_params,
381 16 /* channel tile */, 9 /* primary tile */,
382 benchmark::utils::CheckNEON);
383 }
384 static void f32_dwconv_16x9__neonfma_acc2(benchmark::State& state, const char* net) {
385 DWConvBenchmark(state,
386 xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma_acc2,
387 xnn_init_f32_minmax_scalar_params,
388 16 /* channel tile */, 9 /* primary tile */,
389 benchmark::utils::CheckNEONFMA);
390 }
391 static void f32_dwconv_16x9__neonfma(benchmark::State& state, const char* net) {
392 DWConvBenchmark(state,
393 xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma,
394 xnn_init_f32_minmax_scalar_params,
395 16 /* channel tile */, 9 /* primary tile */,
396 benchmark::utils::CheckNEONFMA);
397 }
398 static void f32_dwconv_16x25__neon_acc2(benchmark::State& state, const char* net) {
399 DWConvBenchmark(state,
400 xnn_f32_dwconv_minmax_ukernel_up16x25__neon_acc2,
401 xnn_init_f32_minmax_scalar_params,
402 16 /* channel tile */, 25 /* primary tile */,
403 benchmark::utils::CheckNEON);
404 }
405 static void f32_dwconv_16x25__neon(benchmark::State& state, const char* net) {
406 DWConvBenchmark(state,
407 xnn_f32_dwconv_minmax_ukernel_up16x25__neon,
408 xnn_init_f32_minmax_scalar_params,
409 16 /* channel tile */, 25 /* primary tile */,
410 benchmark::utils::CheckNEON);
411 }
412 static void f32_dwconv_16x25__neonfma_acc2(benchmark::State& state, const char* net) {
413 DWConvBenchmark(state,
414 xnn_f32_dwconv_minmax_ukernel_up16x25__neonfma_acc2,
415 xnn_init_f32_minmax_scalar_params,
416 16 /* channel tile */, 25 /* primary tile */,
417 benchmark::utils::CheckNEONFMA);
418 }
419 static void f32_dwconv_16x25__neonfma(benchmark::State& state, const char* net) {
420 DWConvBenchmark(state,
421 xnn_f32_dwconv_minmax_ukernel_up16x25__neonfma,
422 xnn_init_f32_minmax_scalar_params,
423 16 /* channel tile */, 25 /* primary tile */,
424 benchmark::utils::CheckNEONFMA);
425 }
Frank Barchard2b95ea12020-06-01 11:58:43 -0700426 BENCHMARK_DWCONV(f32_dwconv_4x4__neonfma)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700427 BENCHMARK_DWCONV(f32_dwconv_4x4__neonfma_acc2)
Frank Barchard2b95ea12020-06-01 11:58:43 -0700428 BENCHMARK_DWCONV(f32_dwconv_8x4__neonfma)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700429 BENCHMARK_DWCONV(f32_dwconv_8x4__neonfma_acc2)
Frank Barchardc9f9d672021-10-18 12:51:59 -0700430 BENCHMARK_DWCONV(f32_dwconv_16x4__neonfma)
431 BENCHMARK_DWCONV(f32_dwconv_16x4__neonfma_acc2)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700432
433 BENCHMARK_DWCONV(f32_dwconv_4x9__neonfma)
434 BENCHMARK_DWCONV(f32_dwconv_4x9__neonfma_acc2)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700435 BENCHMARK_DWCONV(f32_dwconv_8x9__neonfma)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700436 BENCHMARK_DWCONV(f32_dwconv_8x9__neonfma_acc2)
Frank Barchardc9f9d672021-10-18 12:51:59 -0700437 BENCHMARK_DWCONV(f32_dwconv_16x9__neonfma)
438 BENCHMARK_DWCONV(f32_dwconv_16x9__neonfma_acc2)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700439
440 BENCHMARK_DWCONV(f32_dwconv_4x25__neonfma)
441 BENCHMARK_DWCONV(f32_dwconv_4x25__neonfma_acc2)
442 BENCHMARK_DWCONV(f32_dwconv_8x25__neonfma)
443 BENCHMARK_DWCONV(f32_dwconv_8x25__neonfma_acc2)
Frank Barchardc9f9d672021-10-18 12:51:59 -0700444 BENCHMARK_DWCONV(f32_dwconv_16x25__neonfma)
445 BENCHMARK_DWCONV(f32_dwconv_16x25__neonfma_acc2)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700446
447 BENCHMARK_DWCONV(f32_dwconv_4x4__neon)
448 BENCHMARK_DWCONV(f32_dwconv_4x4__neon_acc2)
449 BENCHMARK_DWCONV(f32_dwconv_8x4__neon)
450 BENCHMARK_DWCONV(f32_dwconv_8x4__neon_acc2)
Frank Barchardc9f9d672021-10-18 12:51:59 -0700451 BENCHMARK_DWCONV(f32_dwconv_16x4__neon)
452 BENCHMARK_DWCONV(f32_dwconv_16x4__neon_acc2)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700453
454 BENCHMARK_DWCONV(f32_dwconv_4x9__neon)
455 BENCHMARK_DWCONV(f32_dwconv_4x9__neon_acc2)
456 BENCHMARK_DWCONV(f32_dwconv_8x9__neon)
457 BENCHMARK_DWCONV(f32_dwconv_8x9__neon_acc2)
Frank Barchardc9f9d672021-10-18 12:51:59 -0700458 BENCHMARK_DWCONV(f32_dwconv_16x9__neon)
459 BENCHMARK_DWCONV(f32_dwconv_16x9__neon_acc2)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700460
461 BENCHMARK_DWCONV(f32_dwconv_4x25__neon)
462 BENCHMARK_DWCONV(f32_dwconv_4x25__neon_acc2)
463 BENCHMARK_DWCONV(f32_dwconv_8x25__neon)
464 BENCHMARK_DWCONV(f32_dwconv_8x25__neon_acc2)
Frank Barchardc9f9d672021-10-18 12:51:59 -0700465 BENCHMARK_DWCONV(f32_dwconv_16x25__neon)
466 BENCHMARK_DWCONV(f32_dwconv_16x25__neon_acc2)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700467#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700468
469
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700470#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700471 static void f32_dwconv_4x4__sse(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700472 DWConvBenchmark(state,
473 xnn_f32_dwconv_minmax_ukernel_up4x4__sse,
474 xnn_init_f32_minmax_sse_params,
475 4 /* channel tile */, 4 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700476 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700477 static void f32_dwconv_4x9__sse(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700478 DWConvBenchmark(state,
479 xnn_f32_dwconv_minmax_ukernel_up4x9__sse,
480 xnn_init_f32_minmax_sse_params,
481 4 /* channel tile */, 9 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700482 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700483 static void f32_dwconv_4x25__sse(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700484 DWConvBenchmark(state,
485 xnn_f32_dwconv_minmax_ukernel_up4x25__sse,
486 xnn_init_f32_minmax_sse_params,
487 4 /* channel tile */, 25 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700488 }
489
490 BENCHMARK_DWCONV(f32_dwconv_4x4__sse)
491 BENCHMARK_DWCONV(f32_dwconv_4x9__sse)
492 BENCHMARK_DWCONV(f32_dwconv_4x25__sse)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700493#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700494
495
XNNPACK Teamb455b122019-09-27 18:10:33 -0700496static void f32_dwconv_1x4__scalar(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700497 DWConvBenchmark(state,
498 xnn_f32_dwconv_minmax_ukernel_up1x4__scalar,
499 xnn_init_f32_minmax_scalar_params,
500 1 /* channel tile */, 4 /* primary tile */);
501}
502static void f32_dwconv_1x4__scalar_acc2(benchmark::State& state, const char* net) {
503 DWConvBenchmark(state,
504 xnn_f32_dwconv_minmax_ukernel_up1x4__scalar_acc2,
505 xnn_init_f32_minmax_scalar_params,
506 1 /* channel tile */, 4 /* primary tile */);
507}
508static void f32_dwconv_2x4__scalar(benchmark::State& state, const char* net) {
509 DWConvBenchmark(state,
510 xnn_f32_dwconv_minmax_ukernel_up2x4__scalar,
511 xnn_init_f32_minmax_scalar_params,
512 2 /* channel tile */, 4 /* primary tile */);
513}
514static void f32_dwconv_2x4__scalar_acc2(benchmark::State& state, const char* net) {
515 DWConvBenchmark(state,
516 xnn_f32_dwconv_minmax_ukernel_up2x4__scalar_acc2,
517 xnn_init_f32_minmax_scalar_params,
518 2 /* channel tile */, 4 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700519}
XNNPACK Teamb455b122019-09-27 18:10:33 -0700520static void f32_dwconv_1x9__scalar(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700521 DWConvBenchmark(state,
522 xnn_f32_dwconv_minmax_ukernel_up1x9__scalar,
523 xnn_init_f32_minmax_scalar_params,
524 1 /* channel tile */, 9 /* primary tile */);
525}
526static void f32_dwconv_1x9__scalar_acc2(benchmark::State& state, const char* net) {
527 DWConvBenchmark(state,
528 xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2,
529 xnn_init_f32_minmax_scalar_params,
530 1 /* channel tile */, 9 /* primary tile */);
531}
532static void f32_dwconv_2x9__scalar(benchmark::State& state, const char* net) {
533 DWConvBenchmark(state,
534 xnn_f32_dwconv_minmax_ukernel_up2x9__scalar,
535 xnn_init_f32_minmax_scalar_params,
536 2 /* channel tile */, 9 /* primary tile */);
537}
538static void f32_dwconv_2x9__scalar_acc2(benchmark::State& state, const char* net) {
539 DWConvBenchmark(state,
540 xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2,
541 xnn_init_f32_minmax_scalar_params,
542 2 /* channel tile */, 9 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700543}
XNNPACK Teamb455b122019-09-27 18:10:33 -0700544static void f32_dwconv_1x25__scalar(benchmark::State& state, const char* net) {
Marat Dukhan42b441b2021-07-26 17:40:52 -0700545 DWConvBenchmark(state,
546 xnn_f32_dwconv_minmax_ukernel_up1x25__scalar,
547 xnn_init_f32_minmax_scalar_params,
548 1 /* channel tile */, 25 /* primary tile */);
549}
550static void f32_dwconv_1x25__scalar_acc2(benchmark::State& state, const char* net) {
551 DWConvBenchmark(state,
552 xnn_f32_dwconv_minmax_ukernel_up1x25__scalar_acc2,
553 xnn_init_f32_minmax_scalar_params,
554 1 /* channel tile */, 25 /* primary tile */);
555}
556static void f32_dwconv_2x25__scalar(benchmark::State& state, const char* net) {
557 DWConvBenchmark(state,
558 xnn_f32_dwconv_minmax_ukernel_up1x25__scalar,
559 xnn_init_f32_minmax_scalar_params,
560 2 /* channel tile */, 25 /* primary tile */);
561}
562static void f32_dwconv_2x25__scalar_acc2(benchmark::State& state, const char* net) {
563 DWConvBenchmark(state,
564 xnn_f32_dwconv_minmax_ukernel_up1x25__scalar_acc2,
565 xnn_init_f32_minmax_scalar_params,
566 2 /* channel tile */, 25 /* primary tile */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700567}
568
569BENCHMARK_DWCONV(f32_dwconv_1x4__scalar)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700570BENCHMARK_DWCONV(f32_dwconv_1x4__scalar_acc2)
571BENCHMARK_DWCONV(f32_dwconv_2x4__scalar)
572BENCHMARK_DWCONV(f32_dwconv_2x4__scalar_acc2)
573
XNNPACK Teamb455b122019-09-27 18:10:33 -0700574BENCHMARK_DWCONV(f32_dwconv_1x9__scalar)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700575BENCHMARK_DWCONV(f32_dwconv_1x9__scalar_acc2)
576BENCHMARK_DWCONV(f32_dwconv_2x9__scalar)
577BENCHMARK_DWCONV(f32_dwconv_2x9__scalar_acc2)
578
XNNPACK Teamb455b122019-09-27 18:10:33 -0700579BENCHMARK_DWCONV(f32_dwconv_1x25__scalar)
Marat Dukhan42b441b2021-07-26 17:40:52 -0700580BENCHMARK_DWCONV(f32_dwconv_1x25__scalar_acc2)
581BENCHMARK_DWCONV(f32_dwconv_2x25__scalar)
582BENCHMARK_DWCONV(f32_dwconv_2x25__scalar_acc2)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700583
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700584
XNNPACK Teamb455b122019-09-27 18:10:33 -0700585#ifndef XNNPACK_BENCHMARK_NO_MAIN
586BENCHMARK_MAIN();
587#endif