blob: cca7508fda750c2f7f092854fd34d04222b888e9 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
13#include <cpuinfo.h>
14
15#include <benchmark/benchmark.h>
16#include "bench/dwconv.h"
17#include "bench/utils.h"
18#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070019#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <xnnpack/dwconv.h>
21#include <xnnpack/indirection.h>
22#include <xnnpack/operator.h>
23#include <xnnpack/pack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070024#include <xnnpack/params-init.h>
25#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070026
27
Marat Dukhanbf715f92020-10-23 20:17:00 -070028static void DWConv2DBenchmark(benchmark::State& state,
29 xnn_f32_dwconv2d_chw_ukernel_function dwconv,
Marat Dukhan98f2eeb2020-10-23 23:13:41 -070030 uint32_t kh, uint32_t kw, uint32_t pw, uint32_t s,
31 benchmark::utils::IsaCheckFunction isa_check = nullptr)
XNNPACK Teamb455b122019-09-27 18:10:33 -070032{
33 if (!cpuinfo_initialize()) {
34 state.SkipWithError("cpuinfo initialization failed");
35 return;
36 }
Marat Dukhan98f2eeb2020-10-23 23:13:41 -070037 if (isa_check && !isa_check(state)) {
38 return;
39 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070040
41 const size_t input_height = state.range(0);
42 const size_t input_width = state.range(1);
43 const size_t kernel_height = state.range(2);
44 const size_t kernel_width = state.range(3);
45 const size_t padding_height = state.range(4);
46 const size_t padding_width = state.range(5);
47 const size_t subsampling = state.range(6);
48 const size_t dilation = state.range(7);
49 const size_t channels = state.range(8);
50
51 if (kernel_height != kh) {
52 state.SkipWithError("kernel height mismatch");
53 return;
54 }
55
56 if (kernel_width != kw) {
57 state.SkipWithError("kernel width mismatch");
58 return;
59 }
60
61 if (subsampling != s) {
62 state.SkipWithError("subsampling mismatch");
63 return;
64 }
65
66 if (padding_width % 2 != 0 || padding_width / 2 != pw) {
67 state.SkipWithError("padding width mismatch");
68 return;
69 }
70
71 if (dilation != 1) {
72 state.SkipWithError("unsupported dilation");
73 return;
74 }
75
76 std::random_device random_device;
77 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070078 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070079
80 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
81 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
82 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
83 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
84
85 const size_t inputSize = (input_height + padding_height) * input_width;
86 const size_t kernel_size = kernel_height * kernel_width;
87 const size_t output_size = output_height * output_width;
88
Marat Dukhanae7e8b22020-10-20 17:51:51 -070089 std::vector<float> input(inputSize * channels + 2 * XNN_EXTRA_BYTES);
XNNPACK Teamb455b122019-09-27 18:10:33 -070090 std::generate(input.begin(), input.end(), std::ref(f32rng));
91 std::vector<float> bias(channels);
92 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
93 std::vector<float> kernel(channels * kernel_size);
94 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
Erich Elsen4e5db3d2020-05-07 08:57:47 -070095 std::vector<float> zero(input_width + padding_width);
XNNPACK Teamb455b122019-09-27 18:10:33 -070096
97 const size_t w_elements = (kernel_size + 1) * channels;
98 const size_t o_elements = output_size * channels;
99 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -0700100 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700101 sizeof(float) * (w_elements + o_elements));
102
103 std::vector<float, AlignedAllocator<float, 32>> packed_weights(w_elements * num_buffers);
104 std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);
105 for (size_t c = 0; c < channels; c++) {
106 packed_weights[c * kernel_size + c] = bias[c];
107 for (size_t i = 0; i < kernel_size; i++) {
108 packed_weights[c * kernel_size + c + 1 + i] = kernel[c * kernel_size + i];
109 }
110 }
111 for (size_t n = 1; n < num_buffers; n++) {
112 std::copy(packed_weights.cbegin(), packed_weights.cbegin() + w_elements, packed_weights.begin() + n * w_elements);
113 }
114
115 std::vector<float> output(o_elements * num_buffers);
116 std::fill(output.begin(), output.end(), std::nanf(""));
117
Marat Dukhan1f29b802020-05-15 23:46:39 -0700118 xnn_f32_chw_params chw_params =
119 xnn_init_f32_chw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700120
121 size_t buffer_index = 0;
122 for (auto _ : state) {
123 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700124 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125 buffer_index = (buffer_index + 1) % num_buffers;
126 state.ResumeTiming();
127
128 for (uint32_t channel = 0; channel < channels; channel++) {
129 dwconv(
Marat Dukhan75157772020-10-21 01:46:28 -0700130 input_height, input_width * sizeof(float),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700131 input.data() + channel * inputSize,
132 packed_weights.data() + channel * (kernel_size + 1) + buffer_index * w_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700133 zero.data(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700134 output.data() + channel * output_size + buffer_index * o_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700135 padding_height / 2, // padding_top
Marat Dukhan1f29b802020-05-15 23:46:39 -0700136 &chw_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700137 }
138 }
139
140 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
141 state.counters["FLOPS"] = benchmark::Counter(
142 uint64_t(state.iterations()) * 2 * output_size * channels * kernel_size,
143 benchmark::Counter::kIsRate);
144
145 state.counters["BYTES"] = benchmark::Counter(
146 uint64_t(state.iterations()) * (output_size + inputSize + kernel_size + 1 /* bias */) * channels * sizeof(float),
147 benchmark::Counter::kIsRate);
148}
149
Marat Dukhanc581e482020-10-24 01:28:11 -0700150#if XNN_ARCH_ARM
151 static void dwconv2d_chw_3x3p1__neon_1x4(benchmark::State& state, const char* net) {
152 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4, 3, 3, 1, 1);
153 }
154 static void dwconv2d_chw_3x3p1__neon_2x4(benchmark::State& state, const char* net) {
155 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4, 3, 3, 1, 1);
156 }
157 static void dwconv2d_chw_3x3p1__neon_3x4(benchmark::State& state, const char* net) {
158 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4, 3, 3, 1, 1);
159 }
160 static void dwconv2d_chw_3x3p1__neon_4x4(benchmark::State& state, const char* net) {
161 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4, 3, 3, 1, 1);
162 }
163 static void dwconv2d_chw_3x3p1__neon_5x4(benchmark::State& state, const char* net) {
164 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4, 3, 3, 1, 1);
165 }
166 static void dwconv2d_chw_3x3p1__neon_6x4(benchmark::State& state, const char* net) {
167 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4, 3, 3, 1, 1);
168 }
169 static void dwconv2d_chw_3x3p1__neon_1x4_acc2(benchmark::State& state, const char* net) {
170 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2, 3, 3, 1, 1);
171 }
172 static void dwconv2d_chw_3x3p1__neon_1x4_acc3(benchmark::State& state, const char* net) {
173 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3, 3, 3, 1, 1);
174 }
175 static void dwconv2d_chw_3x3p1__neon_1x4_acc4(benchmark::State& state, const char* net) {
176 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4, 3, 3, 1, 1);
177 }
178 static void dwconv2d_chw_3x3p1__neon_2x4_acc2(benchmark::State& state, const char* net) {
179 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2, 3, 3, 1, 1);
180 }
181
182 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4)
183 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_2x4)
184 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_3x4)
185 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_4x4)
186 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_5x4)
187 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_6x4)
188 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4_acc2)
189 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4_acc3)
190 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4_acc4)
191 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_2x4_acc2)
192#endif // XNN_ARCH_ARM
193
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700194#if XNN_ARCH_ARM64
Marat Dukhan1268a242020-10-24 00:36:32 -0700195 static void dwconv2d_chw_3x3p1__neonfma_1x4(benchmark::State& state, const char* net) {
196 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4, 3, 3, 1, 1);
197 }
198 static void dwconv2d_chw_3x3p1__neonfma_2x4(benchmark::State& state, const char* net) {
199 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4, 3, 3, 1, 1);
200 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700201 static void dwconv2d_chw_3x3p1__neonfma_3x4(benchmark::State& state, const char* net) {
202 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700203 }
Marat Dukhan1268a242020-10-24 00:36:32 -0700204 static void dwconv2d_chw_3x3p1__neonfma_4x4(benchmark::State& state, const char* net) {
205 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4, 3, 3, 1, 1);
206 }
207 static void dwconv2d_chw_3x3p1__neonfma_5x4(benchmark::State& state, const char* net) {
208 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4, 3, 3, 1, 1);
209 }
210 static void dwconv2d_chw_3x3p1__neonfma_6x4(benchmark::State& state, const char* net) {
211 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4, 3, 3, 1, 1);
212 }
213 static void dwconv2d_chw_3x3p1__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
214 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2, 3, 3, 1, 1);
215 }
216 static void dwconv2d_chw_3x3p1__neonfma_1x4_acc3(benchmark::State& state, const char* net) {
217 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3, 3, 3, 1, 1);
218 }
219 static void dwconv2d_chw_3x3p1__neonfma_1x4_acc4(benchmark::State& state, const char* net) {
220 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4, 3, 3, 1, 1);
221 }
222 static void dwconv2d_chw_3x3p1__neonfma_2x4_acc2(benchmark::State& state, const char* net) {
223 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2, 3, 3, 1, 1);
224 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700225
Marat Dukhan82f0c322020-10-25 19:17:35 -0700226 static void dwconv2d_chw_3x3s2p1__neonfma_1x4(benchmark::State& state, const char* net) {
227 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4, 3, 3, 1, 2);
228 }
229 static void dwconv2d_chw_3x3s2p1__neonfma_2x4(benchmark::State& state, const char* net) {
230 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4, 3, 3, 1, 2);
231 }
232 static void dwconv2d_chw_3x3s2p1__neonfma_3x4(benchmark::State& state, const char* net) {
233 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4, 3, 3, 1, 2);
234 }
235 static void dwconv2d_chw_3x3s2p1__neonfma_4x4(benchmark::State& state, const char* net) {
236 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4, 3, 3, 1, 2);
237 }
238 static void dwconv2d_chw_3x3s2p1__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
239 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2, 3, 3, 1, 2);
240 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700241 static void dwconv2d_chw_3x3s2p1__neonfma_1x4_acc3(benchmark::State& state, const char* net) {
242 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700243 }
Marat Dukhan82f0c322020-10-25 19:17:35 -0700244 static void dwconv2d_chw_3x3s2p1__neonfma_1x4_acc4(benchmark::State& state, const char* net) {
245 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4, 3, 3, 1, 2);
246 }
247 static void dwconv2d_chw_3x3s2p1__neonfma_2x4_acc2(benchmark::State& state, const char* net) {
248 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2, 3, 3, 1, 2);
249 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700251 static void dwconv2d_chw_5x5p2__neonfma_1x4(benchmark::State& state, const char* net) {
252 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4, 5, 5, 2, 1);
253 }
254 static void dwconv2d_chw_5x5p2__neonfma_2x4(benchmark::State& state, const char* net) {
255 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4, 5, 5, 2, 1);
256 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700257 static void dwconv2d_chw_5x5p2__neonfma_3x4(benchmark::State& state, const char* net) {
258 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4, 5, 5, 2, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700259 }
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700260 static void dwconv2d_chw_5x5p2__neonfma_4x4(benchmark::State& state, const char* net) {
261 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4, 5, 5, 2, 1);
262 }
263 static void dwconv2d_chw_5x5p2__neonfma_5x4(benchmark::State& state, const char* net) {
264 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4, 5, 5, 2, 1);
265 }
266 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
267 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2, 5, 5, 2, 1);
268 }
269 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc3(benchmark::State& state, const char* net) {
270 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3, 5, 5, 2, 1);
271 }
272 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc4(benchmark::State& state, const char* net) {
273 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc4, 5, 5, 2, 1);
274 }
275 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc5(benchmark::State& state, const char* net) {
276 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc5, 5, 5, 2, 1);
277 }
278 static void dwconv2d_chw_5x5p2__neonfma_2x4_acc2(benchmark::State& state, const char* net) {
279 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2, 5, 5, 2, 1);
280 }
281 static void dwconv2d_chw_5x5p2__neonfma_2x4_acc3(benchmark::State& state, const char* net) {
282 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3, 5, 5, 2, 1);
283 }
284 static void dwconv2d_chw_5x5p2__neonfma_3x4_acc2(benchmark::State& state, const char* net) {
285 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2, 5, 5, 2, 1);
286 }
287 static void dwconv2d_chw_5x5p2__neonfma_4x4_acc2(benchmark::State& state, const char* net) {
288 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2, 5, 5, 2, 1);
289 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700290
Marat Dukhanbf715f92020-10-23 20:17:00 -0700291 static void dwconv2d_chw_5x5s2p2__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
292 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2, 5, 5, 2, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700293 }
294
Marat Dukhan1268a242020-10-24 00:36:32 -0700295 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4)
296 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_2x4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700297 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_3x4)
Marat Dukhan1268a242020-10-24 00:36:32 -0700298 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_4x4)
299 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_5x4)
300 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_6x4)
301 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4_acc2)
302 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4_acc3)
303 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4_acc4)
304 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_2x4_acc2)
305
Marat Dukhan82f0c322020-10-25 19:17:35 -0700306 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4)
307 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_2x4)
308 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_3x4)
309 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_4x4)
310 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700311 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4_acc3)
Marat Dukhan82f0c322020-10-25 19:17:35 -0700312 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4_acc4)
313 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_2x4_acc2)
Marat Dukhan1268a242020-10-24 00:36:32 -0700314
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700315 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4)
316 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_2x4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700317 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_3x4)
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700318 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_4x4)
319 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_5x4)
320 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc2)
321 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc3)
322 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc4)
323 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc5)
324 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_2x4_acc2)
325 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_2x4_acc3)
326 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_3x4_acc2)
327 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_4x4_acc2)
Marat Dukhan1268a242020-10-24 00:36:32 -0700328
Marat Dukhanbf715f92020-10-23 20:17:00 -0700329 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_1x4_acc2)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700330#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700331
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700332#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan470078a2020-10-23 22:36:52 -0700333 static void dwconv2d_chw_3x3p1__sse_1x4(benchmark::State& state, const char* net) {
334 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4, 3, 3, 1, 1);
335 }
336 static void dwconv2d_chw_3x3p1__sse_2x4(benchmark::State& state, const char* net) {
337 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4, 3, 3, 1, 1);
338 }
339 static void dwconv2d_chw_3x3p1__sse_3x4(benchmark::State& state, const char* net) {
340 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4, 3, 3, 1, 1);
341 }
342 static void dwconv2d_chw_3x3p1__sse_4x4(benchmark::State& state, const char* net) {
343 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4, 3, 3, 1, 1);
344 }
345 static void dwconv2d_chw_3x3p1__sse_5x4(benchmark::State& state, const char* net) {
346 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4, 3, 3, 1, 1);
347 }
348 static void dwconv2d_chw_3x3p1__sse_6x4(benchmark::State& state, const char* net) {
349 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4, 3, 3, 1, 1);
350 }
351 static void dwconv2d_chw_3x3p1__sse_1x4_acc2(benchmark::State& state, const char* net) {
352 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2, 3, 3, 1, 1);
353 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700354 static void dwconv2d_chw_3x3p1__sse_1x4_acc3(benchmark::State& state, const char* net) {
355 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700356 }
Marat Dukhan470078a2020-10-23 22:36:52 -0700357 static void dwconv2d_chw_3x3p1__sse_1x4_acc4(benchmark::State& state, const char* net) {
358 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4, 3, 3, 1, 1);
359 }
360 static void dwconv2d_chw_3x3p1__sse_2x4_acc2(benchmark::State& state, const char* net) {
361 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2, 3, 3, 1, 1);
362 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700363
Marat Dukhan98f2eeb2020-10-23 23:13:41 -0700364 static void dwconv2d_chw_3x3p1__ssse3_1x4(benchmark::State& state, const char* net) {
365 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
366 }
367 static void dwconv2d_chw_3x3p1__ssse3_2x4(benchmark::State& state, const char* net) {
368 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
369 }
370 static void dwconv2d_chw_3x3p1__ssse3_3x4(benchmark::State& state, const char* net) {
371 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
372 }
373 static void dwconv2d_chw_3x3p1__ssse3_4x4(benchmark::State& state, const char* net) {
374 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
375 }
376 static void dwconv2d_chw_3x3p1__ssse3_5x4(benchmark::State& state, const char* net) {
377 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
378 }
379 static void dwconv2d_chw_3x3p1__ssse3_6x4(benchmark::State& state, const char* net) {
380 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
381 }
382 static void dwconv2d_chw_3x3p1__ssse3_1x4_acc2(benchmark::State& state, const char* net) {
383 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
384 }
385 static void dwconv2d_chw_3x3p1__ssse3_1x4_acc3(benchmark::State& state, const char* net) {
386 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
387 }
388 static void dwconv2d_chw_3x3p1__ssse3_1x4_acc4(benchmark::State& state, const char* net) {
389 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
390 }
391 static void dwconv2d_chw_3x3p1__ssse3_2x4_acc2(benchmark::State& state, const char* net) {
392 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
393 }
394
Marat Dukhan0ff97182020-10-25 19:14:03 -0700395 static void dwconv2d_chw_3x3s2p1__sse_1x4(benchmark::State& state, const char* net) {
396 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4, 3, 3, 1, 2);
397 }
398 static void dwconv2d_chw_3x3s2p1__sse_2x4(benchmark::State& state, const char* net) {
399 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4, 3, 3, 1, 2);
400 }
401 static void dwconv2d_chw_3x3s2p1__sse_3x4(benchmark::State& state, const char* net) {
402 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4, 3, 3, 1, 2);
403 }
404 static void dwconv2d_chw_3x3s2p1__sse_4x4(benchmark::State& state, const char* net) {
405 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4, 3, 3, 1, 2);
406 }
407 static void dwconv2d_chw_3x3s2p1__sse_1x4_acc2(benchmark::State& state, const char* net) {
408 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2, 3, 3, 1, 2);
409 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700410 static void dwconv2d_chw_3x3s2p1__sse_1x4_acc3(benchmark::State& state, const char* net) {
411 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700412 }
Marat Dukhan0ff97182020-10-25 19:14:03 -0700413 static void dwconv2d_chw_3x3s2p1__sse_1x4_acc4(benchmark::State& state, const char* net) {
414 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4, 3, 3, 1, 2);
415 }
416 static void dwconv2d_chw_3x3s2p1__sse_2x4_acc2(benchmark::State& state, const char* net) {
417 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2, 3, 3, 1, 2);
418 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700419
Marat Dukhan470078a2020-10-23 22:36:52 -0700420 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4)
421 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_2x4)
422 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_3x4)
423 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_4x4)
424 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_5x4)
425 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_6x4)
426 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700427 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4_acc3)
Marat Dukhan470078a2020-10-23 22:36:52 -0700428 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4_acc4)
429 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_2x4_acc2)
430
Marat Dukhan98f2eeb2020-10-23 23:13:41 -0700431 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4)
432 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_2x4)
433 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_3x4)
434 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_4x4)
435 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_5x4)
436 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_6x4)
437 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4_acc2)
438 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4_acc3)
439 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4_acc4)
440 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_2x4_acc2)
441
Marat Dukhan0ff97182020-10-25 19:14:03 -0700442 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4)
443 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_2x4)
444 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_3x4)
445 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_4x4)
446 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700447 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4_acc3)
Marat Dukhan0ff97182020-10-25 19:14:03 -0700448 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4_acc4)
449 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_2x4_acc2)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700450#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700451
Marat Dukhana199d492020-07-24 15:01:25 -0700452#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanbf715f92020-10-23 20:17:00 -0700453 static void dwconv2d_chw_3x3p1__psimd_1x4_acc3(benchmark::State& state, const char* net) {
454 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__psimd_1x4_acc3, 3, 3, 1, 1);
Erich Elsene6214af2020-06-10 22:17:22 -0700455 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700456 static void dwconv2d_chw_3x3s2p1__psimd_1x4_acc3(benchmark::State& state, const char* net) {
457 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, 3, 3, 1, 2);
Erich Elsenfd7a6e32020-06-11 12:04:44 -0700458 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700459 static void dwconv2d_chw_5x5p2__psimd_3x4(benchmark::State& state, const char* net) {
460 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, 5, 5, 2, 1);
Erich Elsen28928892020-06-12 08:08:19 -0700461 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700462 static void dwconv2d_chw_5x5s2p2__psimd_1x4_acc2(benchmark::State& state, const char* net) {
463 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, 5, 5, 2, 2);
Erich Elsen7465a892020-06-13 14:02:04 -0700464 }
465
Marat Dukhanbf715f92020-10-23 20:17:00 -0700466 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__psimd_1x4_acc3)
467 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__psimd_1x4_acc3)
468 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__psimd_3x4)
469 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__psimd_1x4_acc2)
Marat Dukhana199d492020-07-24 15:01:25 -0700470#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Erich Elsene6214af2020-06-10 22:17:22 -0700471
Marat Dukhan91249d22020-10-24 12:02:51 -0700472static void dwconv2d_chw_3x3p1__scalar_1x1(benchmark::State& state, const char* net) {
473 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1, 3, 3, 1, 1);
474}
475static void dwconv2d_chw_3x3p1__scalar_2x1(benchmark::State& state, const char* net) {
476 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1, 3, 3, 1, 1);
477}
478static void dwconv2d_chw_3x3p1__scalar_3x1(benchmark::State& state, const char* net) {
479 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1, 3, 3, 1, 1);
480}
481static void dwconv2d_chw_3x3p1__scalar_4x1(benchmark::State& state, const char* net) {
482 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1, 3, 3, 1, 1);
483}
484static void dwconv2d_chw_3x3p1__scalar_5x1(benchmark::State& state, const char* net) {
485 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1, 3, 3, 1, 1);
486}
487static void dwconv2d_chw_3x3p1__scalar_6x1(benchmark::State& state, const char* net) {
488 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1, 3, 3, 1, 1);
489}
490static void dwconv2d_chw_3x3p1__scalar_1x1_acc2(benchmark::State& state, const char* net) {
491 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2, 3, 3, 1, 1);
492}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700493static void dwconv2d_chw_3x3p1__scalar_1x1_acc3(benchmark::State& state, const char* net) {
494 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3, 3, 3, 1, 1);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700495}
Marat Dukhan91249d22020-10-24 12:02:51 -0700496static void dwconv2d_chw_3x3p1__scalar_1x1_acc4(benchmark::State& state, const char* net) {
497 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4, 3, 3, 1, 1);
498}
499static void dwconv2d_chw_3x3p1__scalar_2x1_acc2(benchmark::State& state, const char* net) {
500 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2, 3, 3, 1, 1);
501}
Erich Elsen0cc2c532019-10-15 04:44:18 -0700502
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700503static void dwconv2d_chw_3x3s2p1__scalar_1x1(benchmark::State& state, const char* net) {
504 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1, 3, 3, 1, 2);
505}
506static void dwconv2d_chw_3x3s2p1__scalar_2x1(benchmark::State& state, const char* net) {
507 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, 3, 3, 1, 2);
508}
509static void dwconv2d_chw_3x3s2p1__scalar_3x1(benchmark::State& state, const char* net) {
510 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, 3, 3, 1, 2);
511}
512static void dwconv2d_chw_3x3s2p1__scalar_4x1(benchmark::State& state, const char* net) {
513 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, 3, 3, 1, 2);
514}
515static void dwconv2d_chw_3x3s2p1__scalar_1x1_acc2(benchmark::State& state, const char* net) {
516 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2, 3, 3, 1, 2);
517}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700518static void dwconv2d_chw_3x3s2p1__scalar_1x1_acc3(benchmark::State& state, const char* net) {
519 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3, 3, 3, 1, 2);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700520}
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700521static void dwconv2d_chw_3x3s2p1__scalar_1x1_acc4(benchmark::State& state, const char* net) {
522 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4, 3, 3, 1, 2);
523}
524static void dwconv2d_chw_3x3s2p1__scalar_2x1_acc2(benchmark::State& state, const char* net) {
525 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, 3, 3, 1, 2);
526}
Erich Elsen38709a62019-11-08 11:58:45 -0800527
Marat Dukhanc4efb002020-10-25 23:14:47 -0700528static void dwconv2d_chw_5x5p2__scalar_1x1(benchmark::State& state, const char* net) {
529 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1, 5, 5, 2, 1);
530}
531static void dwconv2d_chw_5x5p2__scalar_2x1(benchmark::State& state, const char* net) {
532 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1, 5, 5, 2, 1);
533}
534static void dwconv2d_chw_5x5p2__scalar_3x1(benchmark::State& state, const char* net) {
535 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1, 5, 5, 2, 1);
536}
537static void dwconv2d_chw_5x5p2__scalar_1x1_acc2(benchmark::State& state, const char* net) {
538 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2, 5, 5, 2, 1);
539}
540static void dwconv2d_chw_5x5p2__scalar_1x1_acc3(benchmark::State& state, const char* net) {
541 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3, 5, 5, 2, 1);
542}
543static void dwconv2d_chw_5x5p2__scalar_1x1_acc4(benchmark::State& state, const char* net) {
544 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4, 5, 5, 2, 1);
545}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700546static void dwconv2d_chw_5x5p2__scalar_1x1_acc5(benchmark::State& state, const char* net) {
547 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5, 5, 5, 2, 1);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700548}
Marat Dukhanc4efb002020-10-25 23:14:47 -0700549static void dwconv2d_chw_5x5p2__scalar_2x1_acc2(benchmark::State& state, const char* net) {
550 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2, 5, 5, 2, 1);
551}
552static void dwconv2d_chw_5x5p2__scalar_2x1_acc3(benchmark::State& state, const char* net) {
553 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3, 5, 5, 2, 1);
554}
555static void dwconv2d_chw_5x5p2__scalar_3x1_acc2(benchmark::State& state, const char* net) {
556 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2, 5, 5, 2, 1);
557}
Erich Elsenac4de802019-10-16 04:35:30 -0700558
Marat Dukhan29c0c332020-10-28 22:11:00 -0700559static void dwconv2d_chw_5x5s2p2__scalar_1x1(benchmark::State& state, const char* net) {
560 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1, 5, 5, 2, 2);
561}
562static void dwconv2d_chw_5x5s2p2__scalar_2x1(benchmark::State& state, const char* net) {
563 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1, 5, 5, 2, 2);
564}
565static void dwconv2d_chw_5x5s2p2__scalar_3x1(benchmark::State& state, const char* net) {
566 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1, 5, 5, 2, 2);
567}
568static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc2(benchmark::State& state, const char* net) {
569 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2, 5, 5, 2, 2);
570}
571static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc3(benchmark::State& state, const char* net) {
572 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3, 5, 5, 2, 2);
573}
574static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc4(benchmark::State& state, const char* net) {
575 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4, 5, 5, 2, 2);
576}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700577static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc5(benchmark::State& state, const char* net) {
578 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5, 5, 5, 2, 2);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700579}
Marat Dukhan29c0c332020-10-28 22:11:00 -0700580static void dwconv2d_chw_5x5s2p2__scalar_2x1_acc2(benchmark::State& state, const char* net) {
581 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2, 5, 5, 2, 2);
582}
583static void dwconv2d_chw_5x5s2p2__scalar_2x1_acc3(benchmark::State& state, const char* net) {
584 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3, 5, 5, 2, 2);
585}
586static void dwconv2d_chw_5x5s2p2__scalar_3x1_acc2(benchmark::State& state, const char* net) {
587 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2, 5, 5, 2, 2);
588}
Erich Elsen38709a62019-11-08 11:58:45 -0800589
Marat Dukhan91249d22020-10-24 12:02:51 -0700590BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1)
591BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_2x1)
592BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_3x1)
593BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_4x1)
594BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_5x1)
595BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_6x1)
596BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700597BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1_acc3)
Marat Dukhan91249d22020-10-24 12:02:51 -0700598BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1_acc4)
599BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_2x1_acc2)
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700600
601BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1)
602BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_2x1)
603BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_3x1)
604BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_4x1)
605BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700606BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1_acc3)
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700607BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1_acc4)
608BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_2x1_acc2)
609
Marat Dukhanc4efb002020-10-25 23:14:47 -0700610BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1)
611BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_2x1)
612BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_3x1)
613BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc2)
614BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc3)
615BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700616BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc5)
Marat Dukhanc4efb002020-10-25 23:14:47 -0700617BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_2x1_acc2)
618BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_2x1_acc3)
619BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_3x1_acc2)
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700620
Marat Dukhan29c0c332020-10-28 22:11:00 -0700621BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1)
622BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_2x1)
623BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_3x1)
624BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc2)
625BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc3)
626BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700627BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc5)
Marat Dukhan29c0c332020-10-28 22:11:00 -0700628BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_2x1_acc2)
629BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_2x1_acc3)
630BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_3x1_acc2)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700631
632#ifndef XNNPACK_BENCHMARK_NO_MAIN
633BENCHMARK_MAIN();
634#endif