blob: e3244448592a9496756aba6f54637d3dcd950978 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
13#include <cpuinfo.h>
14
15#include <benchmark/benchmark.h>
16#include "bench/dwconv.h"
17#include "bench/utils.h"
18#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070019#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <xnnpack/dwconv.h>
21#include <xnnpack/indirection.h>
22#include <xnnpack/operator.h>
23#include <xnnpack/pack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070024#include <xnnpack/params-init.h>
25#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070026
27
28static void DWConvCHWBenchmark(benchmark::State& state,
Marat Dukhan1f29b802020-05-15 23:46:39 -070029 xnn_f32_dwconv_chw_ukernel_function dwconv,
XNNPACK Teamb455b122019-09-27 18:10:33 -070030 uint32_t it, uint32_t ot, uint32_t kh, uint32_t kw, uint32_t pw, uint32_t s)
31{
32 if (!cpuinfo_initialize()) {
33 state.SkipWithError("cpuinfo initialization failed");
34 return;
35 }
36
37 const size_t input_height = state.range(0);
38 const size_t input_width = state.range(1);
39 const size_t kernel_height = state.range(2);
40 const size_t kernel_width = state.range(3);
41 const size_t padding_height = state.range(4);
42 const size_t padding_width = state.range(5);
43 const size_t subsampling = state.range(6);
44 const size_t dilation = state.range(7);
45 const size_t channels = state.range(8);
46
47 if (kernel_height != kh) {
48 state.SkipWithError("kernel height mismatch");
49 return;
50 }
51
52 if (kernel_width != kw) {
53 state.SkipWithError("kernel width mismatch");
54 return;
55 }
56
57 if (subsampling != s) {
58 state.SkipWithError("subsampling mismatch");
59 return;
60 }
61
62 if (padding_width % 2 != 0 || padding_width / 2 != pw) {
63 state.SkipWithError("padding width mismatch");
64 return;
65 }
66
67 if (dilation != 1) {
68 state.SkipWithError("unsupported dilation");
69 return;
70 }
71
72 std::random_device random_device;
73 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070074 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070075
76 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
77 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
78 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
79 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
80
81 const size_t inputSize = (input_height + padding_height) * input_width;
82 const size_t kernel_size = kernel_height * kernel_width;
83 const size_t output_size = output_height * output_width;
84
85 std::vector<float> input(inputSize * channels + 2 * it);
86 std::generate(input.begin(), input.end(), std::ref(f32rng));
87 std::vector<float> bias(channels);
88 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
89 std::vector<float> kernel(channels * kernel_size);
90 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
Erich Elsen4e5db3d2020-05-07 08:57:47 -070091 std::vector<float> zero(input_width + padding_width);
XNNPACK Teamb455b122019-09-27 18:10:33 -070092
93 const size_t w_elements = (kernel_size + 1) * channels;
94 const size_t o_elements = output_size * channels;
95 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -070096 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -070097 sizeof(float) * (w_elements + o_elements));
98
99 std::vector<float, AlignedAllocator<float, 32>> packed_weights(w_elements * num_buffers);
100 std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);
101 for (size_t c = 0; c < channels; c++) {
102 packed_weights[c * kernel_size + c] = bias[c];
103 for (size_t i = 0; i < kernel_size; i++) {
104 packed_weights[c * kernel_size + c + 1 + i] = kernel[c * kernel_size + i];
105 }
106 }
107 for (size_t n = 1; n < num_buffers; n++) {
108 std::copy(packed_weights.cbegin(), packed_weights.cbegin() + w_elements, packed_weights.begin() + n * w_elements);
109 }
110
111 std::vector<float> output(o_elements * num_buffers);
112 std::fill(output.begin(), output.end(), std::nanf(""));
113
Marat Dukhan1f29b802020-05-15 23:46:39 -0700114 xnn_f32_chw_params chw_params =
115 xnn_init_f32_chw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700116
117 size_t buffer_index = 0;
118 for (auto _ : state) {
119 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700120 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700121 buffer_index = (buffer_index + 1) % num_buffers;
122 state.ResumeTiming();
123
124 for (uint32_t channel = 0; channel < channels; channel++) {
125 dwconv(
Erich Elseneda9c112020-05-11 04:40:25 -0700126 input_height, input_width,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700127 input.data() + channel * inputSize,
128 packed_weights.data() + channel * (kernel_size + 1) + buffer_index * w_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700129 zero.data(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700130 output.data() + channel * output_size + buffer_index * o_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700131 padding_height / 2, // padding_top
XNNPACK Teamb455b122019-09-27 18:10:33 -0700132 it * sizeof(float), ot * sizeof(float),
133 input_width * sizeof(float), output_width * sizeof(float),
Marat Dukhan1f29b802020-05-15 23:46:39 -0700134 &chw_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700135 }
136 }
137
138 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
139 state.counters["FLOPS"] = benchmark::Counter(
140 uint64_t(state.iterations()) * 2 * output_size * channels * kernel_size,
141 benchmark::Counter::kIsRate);
142
143 state.counters["BYTES"] = benchmark::Counter(
144 uint64_t(state.iterations()) * (output_size + inputSize + kernel_size + 1 /* bias */) * channels * sizeof(float),
145 benchmark::Counter::kIsRate);
146}
147
148static void DWConvHWoTCTBenchmark(benchmark::State& state,
Marat Dukhan1f29b802020-05-15 23:46:39 -0700149 xnn_f32_dwconv_chw_ukernel_function dwconv,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700150 uint32_t it, uint32_t ot, uint32_t kh, uint32_t kw, uint32_t pw, uint32_t s)
151{
152 if (!cpuinfo_initialize()) {
153 state.SkipWithError("cpuinfo initialization failed");
154 return;
155 }
156
157 const size_t input_height = state.range(0);
158 const size_t input_width = state.range(1);
159 const size_t kernel_height = state.range(2);
160 const size_t kernel_width = state.range(3);
161 const size_t padding_height = state.range(4);
162 const size_t padding_width = state.range(5);
163 const size_t subsampling = state.range(6);
164 const size_t dilation = state.range(7);
165 const size_t channels = state.range(8);
166
167 if (kernel_height != kh) {
168 state.SkipWithError("kernel height mismatch");
169 return;
170 }
171
172 if (kernel_width != kw) {
173 state.SkipWithError("kernel width mismatch");
174 return;
175 }
176
177 if (subsampling != s) {
178 state.SkipWithError("subsampling mismatch");
179 return;
180 }
181
182 if (padding_width % 2 != 0 || padding_width / 2 != pw) {
183 state.SkipWithError("padding width mismatch");
184 return;
185 }
186
187 if (dilation != 1) {
188 state.SkipWithError("unsupported dilation");
189 return;
190 }
191
192 std::random_device random_device;
193 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700194 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700195
196 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
197 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
198 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
199 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
200
201 const size_t inputSize = (input_height + padding_height) * input_width;
202 const size_t kernel_size = kernel_height * kernel_width;
203 const size_t output_size = output_height * output_width;
204
Marat Dukhan42323232019-10-23 02:09:02 -0700205 std::vector<float> input(input_height * benchmark::utils::RoundUp<size_t>(input_width, it) * channels);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700206 std::generate(input.begin(), input.end(), std::ref(f32rng));
207 std::vector<float> bias(channels);
208 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
209 std::vector<float> kernel(channels * kernel_size);
210 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700211 std::vector<float> zero(input_width + padding_width);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700212
213 const size_t w_elements = (kernel_size + 1) * channels;
Marat Dukhan42323232019-10-23 02:09:02 -0700214 const size_t o_elements = output_height * benchmark::utils::RoundUp<size_t>(output_width, ot) * channels;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700215 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -0700216 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700217 sizeof(float) * (w_elements + o_elements));
218
219 std::vector<float, AlignedAllocator<float, 32>> packed_weights(w_elements * num_buffers);
220 std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);
221 for (size_t c = 0; c < channels; c++) {
222 packed_weights[c * kernel_size + c] = bias[c];
223 for (size_t i = 0; i < kernel_size; i++) {
224 packed_weights[c * kernel_size + c + 1 + i] = kernel[c * kernel_size + i];
225 }
226 }
227 for (size_t n = 1; n < num_buffers; n++) {
228 std::copy(packed_weights.cbegin(), packed_weights.cbegin() + w_elements, packed_weights.begin() + n * w_elements);
229 }
230
231 std::vector<float> output(o_elements * num_buffers);
232 std::fill(output.begin(), output.end(), std::nanf(""));
233
Marat Dukhan1f29b802020-05-15 23:46:39 -0700234 xnn_f32_chw_params chw_params =
235 xnn_init_f32_chw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700236
237 size_t buffer_index = 0;
238 for (auto _ : state) {
239 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700240 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700241 buffer_index = (buffer_index + 1) % num_buffers;
242 state.ResumeTiming();
243
244 for (uint32_t channel = 0; channel < channels; channel++) {
245 dwconv(
Erich Elseneda9c112020-05-11 04:40:25 -0700246 input_height, input_width,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700247 input.data() + channel * it,
248 packed_weights.data() + channel * (kernel_size + 1) + buffer_index * w_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700249 zero.data(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250 output.data() + channel * ot + buffer_index * o_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700251 padding_height / 2, // padding_top
XNNPACK Teamb455b122019-09-27 18:10:33 -0700252 it * channels * sizeof(float), ot * channels * sizeof(float),
Marat Dukhan42323232019-10-23 02:09:02 -0700253 benchmark::utils::RoundUp<size_t>(input_width, it) * channels * sizeof(float),
254 benchmark::utils::RoundUp<size_t>(output_width, ot) * channels * sizeof(float),
Marat Dukhan1f29b802020-05-15 23:46:39 -0700255 &chw_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700256 }
257 }
258
259 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
260 state.counters["FLOPS"] = benchmark::Counter(
261 uint64_t(state.iterations()) * 2 * output_size * channels * kernel_size,
262 benchmark::Counter::kIsRate);
263
264 state.counters["BYTES"] = benchmark::Counter(
265 uint64_t(state.iterations()) * (output_size + inputSize + kernel_size + 1 /* bias */) * channels * sizeof(float),
266 benchmark::Counter::kIsRate);
267}
268
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700269#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700270 static void CHW_3x3p1__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700271 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma, 4, 4, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700272 }
273
274 static void CHW_5x5p2__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700275 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma, 4, 4, 5, 5, 2, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700276 }
277
278 static void CHW_3x3s2p1__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700279 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma, 4, 4, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700280 }
281
282 static void CHW_5x5s2p2__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700283 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma, 4, 4, 5, 5, 2, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700284 }
285
286 static void HWo4C4_3x3p1__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700287 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma, 4, 4, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700288 }
289
290 static void HWo4C4_5x5p2__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700291 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma, 4, 4, 5, 5, 2, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700292 }
293
294 static void HWo4C4_3x3s2p1__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700295 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma, 4, 4, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700296 }
297
298 static void HWo4C4_5x5s2p2__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700299 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma, 4, 4, 5, 5, 2, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700300 }
301
302 BENCHMARK_DWCONV(CHW_3x3p1__neonfma)
303 BENCHMARK_DWCONV(CHW_5x5p2__neonfma)
304 BENCHMARK_DWCONV(CHW_3x3s2p1__neonfma)
305 BENCHMARK_DWCONV(CHW_5x5s2p2__neonfma)
306 BENCHMARK_DWCONV(HWo4C4_3x3p1__neonfma)
307 BENCHMARK_DWCONV(HWo4C4_5x5p2__neonfma)
308 BENCHMARK_DWCONV(HWo4C4_3x3s2p1__neonfma)
309 BENCHMARK_DWCONV(HWo4C4_5x5s2p2__neonfma)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700310#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700311
312
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700313#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700314 static void CHW_3x3p1__sse(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700315 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__sse, 4, 4, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700316 }
317
318 static void CHW_3x3s2p1__sse(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700319 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse, 4, 4, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700320 }
321
322 static void HWo4C4_3x3p1__sse(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700323 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__sse, 4, 4, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700324 }
325
326 static void HWo4C4_3x3s2p1__sse(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700327 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse, 4, 4, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700328 }
329
330 BENCHMARK_DWCONV(CHW_3x3p1__sse)
331 BENCHMARK_DWCONV(CHW_3x3s2p1__sse)
332 BENCHMARK_DWCONV(HWo4C4_3x3p1__sse)
333 BENCHMARK_DWCONV(HWo4C4_3x3s2p1__sse)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700334#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700335
Marat Dukhana199d492020-07-24 15:01:25 -0700336#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Erich Elsene6214af2020-06-10 22:17:22 -0700337 static void CHW_3x3p1__psimd(benchmark::State& state, const char* net) {
338 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__psimd, 1, 1, 3, 3, 1, 1);
339 }
Erich Elsenfd7a6e32020-06-11 12:04:44 -0700340 static void CHW_3x3s2p1__psimd(benchmark::State& state, const char* net) {
341 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__psimd, 4, 4, 3, 3, 1, 2);
342 }
Erich Elsen28928892020-06-12 08:08:19 -0700343 static void CHW_5x5p2__psimd(benchmark::State& state, const char* net) {
344 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__psimd, 4, 4, 5, 5, 2, 1);
345 }
Erich Elsen7465a892020-06-13 14:02:04 -0700346 static void CHW_5x5s2p2__psimd(benchmark::State& state, const char* net) {
347 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__psimd, 4, 4, 5, 5, 2, 2);
348 }
349
Erich Elsen28928892020-06-12 08:08:19 -0700350
351
Erich Elsene6214af2020-06-10 22:17:22 -0700352 BENCHMARK_DWCONV(CHW_3x3p1__psimd)
Erich Elsenfd7a6e32020-06-11 12:04:44 -0700353 BENCHMARK_DWCONV(CHW_3x3s2p1__psimd)
Erich Elsen28928892020-06-12 08:08:19 -0700354 BENCHMARK_DWCONV(CHW_5x5p2__psimd)
Erich Elsen7465a892020-06-13 14:02:04 -0700355 BENCHMARK_DWCONV(CHW_5x5s2p2__psimd)
Marat Dukhana199d492020-07-24 15:01:25 -0700356#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Erich Elsene6214af2020-06-10 22:17:22 -0700357
Erich Elsen0cc2c532019-10-15 04:44:18 -0700358 static void CHW_3x3p1__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700359 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, 1, 1, 3, 3, 1, 1);
Erich Elsen0cc2c532019-10-15 04:44:18 -0700360 }
361
Erich Elsen38709a62019-11-08 11:58:45 -0800362 static void CHW_5x5p2__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700363 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, 1, 1, 5, 5, 2, 1);
Erich Elsen38709a62019-11-08 11:58:45 -0800364 }
365
Erich Elsenac4de802019-10-16 04:35:30 -0700366 static void CHW_3x3s2p1__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700367 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, 1, 1, 3, 3, 1, 2);
Erich Elsenac4de802019-10-16 04:35:30 -0700368 }
369
Erich Elsen38709a62019-11-08 11:58:45 -0800370 static void CHW_5x5s2p2__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700371 DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, 1, 1, 5, 5, 2, 2);
Erich Elsen38709a62019-11-08 11:58:45 -0800372 }
373
Erich Elsenac4de802019-10-16 04:35:30 -0700374 static void HWC_3x3p1__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700375 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, 1, 1, 3, 3, 1, 1);
Erich Elsen0cc2c532019-10-15 04:44:18 -0700376 }
377
Erich Elsen38709a62019-11-08 11:58:45 -0800378 static void HWC_5x5p2__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700379 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, 1, 1, 5, 5, 2, 1);
Erich Elsen38709a62019-11-08 11:58:45 -0800380 }
381
Erich Elsenac4de802019-10-16 04:35:30 -0700382 static void HWC_3x3s2p1__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700383 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, 1, 1, 3, 3, 1, 2);
Erich Elsenac4de802019-10-16 04:35:30 -0700384 }
385
Erich Elsen38709a62019-11-08 11:58:45 -0800386 static void HWC_5x5s2p2__scalar(benchmark::State& state, const char* net) {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700387 DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, 1, 1, 5, 5, 2, 2);
Erich Elsen38709a62019-11-08 11:58:45 -0800388 }
389
Erich Elsen0cc2c532019-10-15 04:44:18 -0700390
391 BENCHMARK_DWCONV(CHW_3x3p1__scalar)
Erich Elsen38709a62019-11-08 11:58:45 -0800392 BENCHMARK_DWCONV(CHW_5x5p2__scalar)
Erich Elsenac4de802019-10-16 04:35:30 -0700393 BENCHMARK_DWCONV(CHW_3x3s2p1__scalar)
Erich Elsen38709a62019-11-08 11:58:45 -0800394 BENCHMARK_DWCONV(CHW_5x5s2p2__scalar)
Erich Elsenac4de802019-10-16 04:35:30 -0700395 BENCHMARK_DWCONV(HWC_3x3p1__scalar)
Erich Elsen38709a62019-11-08 11:58:45 -0800396 BENCHMARK_DWCONV(HWC_5x5p2__scalar)
Erich Elsenac4de802019-10-16 04:35:30 -0700397 BENCHMARK_DWCONV(HWC_3x3s2p1__scalar)
Erich Elsen38709a62019-11-08 11:58:45 -0800398 BENCHMARK_DWCONV(HWC_5x5s2p2__scalar)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700399
400#ifndef XNNPACK_BENCHMARK_NO_MAIN
401BENCHMARK_MAIN();
402#endif