blob: 505e988e38c69e24814aa343fcfcdb73f3ac5512 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
13#include <cpuinfo.h>
14
15#include <benchmark/benchmark.h>
16#include "bench/dwconv.h"
17#include "bench/utils.h"
18#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070019#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <xnnpack/dwconv.h>
21#include <xnnpack/indirection.h>
22#include <xnnpack/operator.h>
23#include <xnnpack/pack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070024#include <xnnpack/params-init.h>
25#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070026
27
Marat Dukhanbf715f92020-10-23 20:17:00 -070028static void DWConv2DBenchmark(benchmark::State& state,
29 xnn_f32_dwconv2d_chw_ukernel_function dwconv,
Marat Dukhan98f2eeb2020-10-23 23:13:41 -070030 uint32_t kh, uint32_t kw, uint32_t pw, uint32_t s,
31 benchmark::utils::IsaCheckFunction isa_check = nullptr)
XNNPACK Teamb455b122019-09-27 18:10:33 -070032{
33 if (!cpuinfo_initialize()) {
34 state.SkipWithError("cpuinfo initialization failed");
35 return;
36 }
Marat Dukhan98f2eeb2020-10-23 23:13:41 -070037 if (isa_check && !isa_check(state)) {
38 return;
39 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070040
41 const size_t input_height = state.range(0);
42 const size_t input_width = state.range(1);
43 const size_t kernel_height = state.range(2);
44 const size_t kernel_width = state.range(3);
45 const size_t padding_height = state.range(4);
46 const size_t padding_width = state.range(5);
47 const size_t subsampling = state.range(6);
48 const size_t dilation = state.range(7);
49 const size_t channels = state.range(8);
50
51 if (kernel_height != kh) {
52 state.SkipWithError("kernel height mismatch");
53 return;
54 }
55
56 if (kernel_width != kw) {
57 state.SkipWithError("kernel width mismatch");
58 return;
59 }
60
61 if (subsampling != s) {
62 state.SkipWithError("subsampling mismatch");
63 return;
64 }
65
66 if (padding_width % 2 != 0 || padding_width / 2 != pw) {
67 state.SkipWithError("padding width mismatch");
68 return;
69 }
70
71 if (dilation != 1) {
72 state.SkipWithError("unsupported dilation");
73 return;
74 }
75
76 std::random_device random_device;
77 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070078 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070079
80 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
81 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
82 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
83 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
84
85 const size_t inputSize = (input_height + padding_height) * input_width;
86 const size_t kernel_size = kernel_height * kernel_width;
87 const size_t output_size = output_height * output_width;
88
Marat Dukhanae7e8b22020-10-20 17:51:51 -070089 std::vector<float> input(inputSize * channels + 2 * XNN_EXTRA_BYTES);
XNNPACK Teamb455b122019-09-27 18:10:33 -070090 std::generate(input.begin(), input.end(), std::ref(f32rng));
91 std::vector<float> bias(channels);
92 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
93 std::vector<float> kernel(channels * kernel_size);
94 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
Erich Elsen4e5db3d2020-05-07 08:57:47 -070095 std::vector<float> zero(input_width + padding_width);
XNNPACK Teamb455b122019-09-27 18:10:33 -070096
97 const size_t w_elements = (kernel_size + 1) * channels;
98 const size_t o_elements = output_size * channels;
99 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -0700100 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700101 sizeof(float) * (w_elements + o_elements));
102
103 std::vector<float, AlignedAllocator<float, 32>> packed_weights(w_elements * num_buffers);
104 std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);
105 for (size_t c = 0; c < channels; c++) {
106 packed_weights[c * kernel_size + c] = bias[c];
107 for (size_t i = 0; i < kernel_size; i++) {
108 packed_weights[c * kernel_size + c + 1 + i] = kernel[c * kernel_size + i];
109 }
110 }
111 for (size_t n = 1; n < num_buffers; n++) {
112 std::copy(packed_weights.cbegin(), packed_weights.cbegin() + w_elements, packed_weights.begin() + n * w_elements);
113 }
114
115 std::vector<float> output(o_elements * num_buffers);
116 std::fill(output.begin(), output.end(), std::nanf(""));
117
Marat Dukhan1f29b802020-05-15 23:46:39 -0700118 xnn_f32_chw_params chw_params =
119 xnn_init_f32_chw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700120
121 size_t buffer_index = 0;
122 for (auto _ : state) {
123 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700124 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125 buffer_index = (buffer_index + 1) % num_buffers;
126 state.ResumeTiming();
127
128 for (uint32_t channel = 0; channel < channels; channel++) {
129 dwconv(
Marat Dukhan75157772020-10-21 01:46:28 -0700130 input_height, input_width * sizeof(float),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700131 input.data() + channel * inputSize,
132 packed_weights.data() + channel * (kernel_size + 1) + buffer_index * w_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700133 zero.data(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700134 output.data() + channel * output_size + buffer_index * o_elements,
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700135 padding_height / 2, // padding_top
Marat Dukhan1f29b802020-05-15 23:46:39 -0700136 &chw_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700137 }
138 }
139
140 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
141 state.counters["FLOPS"] = benchmark::Counter(
142 uint64_t(state.iterations()) * 2 * output_size * channels * kernel_size,
143 benchmark::Counter::kIsRate);
144
145 state.counters["BYTES"] = benchmark::Counter(
146 uint64_t(state.iterations()) * (output_size + inputSize + kernel_size + 1 /* bias */) * channels * sizeof(float),
147 benchmark::Counter::kIsRate);
148}
149
Marat Dukhanc581e482020-10-24 01:28:11 -0700150#if XNN_ARCH_ARM
151 static void dwconv2d_chw_3x3p1__neon_1x4(benchmark::State& state, const char* net) {
152 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4, 3, 3, 1, 1);
153 }
154 static void dwconv2d_chw_3x3p1__neon_2x4(benchmark::State& state, const char* net) {
155 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4, 3, 3, 1, 1);
156 }
157 static void dwconv2d_chw_3x3p1__neon_3x4(benchmark::State& state, const char* net) {
158 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4, 3, 3, 1, 1);
159 }
160 static void dwconv2d_chw_3x3p1__neon_4x4(benchmark::State& state, const char* net) {
161 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4, 3, 3, 1, 1);
162 }
163 static void dwconv2d_chw_3x3p1__neon_5x4(benchmark::State& state, const char* net) {
164 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4, 3, 3, 1, 1);
165 }
166 static void dwconv2d_chw_3x3p1__neon_6x4(benchmark::State& state, const char* net) {
167 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4, 3, 3, 1, 1);
168 }
169 static void dwconv2d_chw_3x3p1__neon_1x4_acc2(benchmark::State& state, const char* net) {
170 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2, 3, 3, 1, 1);
171 }
172 static void dwconv2d_chw_3x3p1__neon_1x4_acc3(benchmark::State& state, const char* net) {
173 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3, 3, 3, 1, 1);
174 }
175 static void dwconv2d_chw_3x3p1__neon_1x4_acc4(benchmark::State& state, const char* net) {
176 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4, 3, 3, 1, 1);
177 }
178 static void dwconv2d_chw_3x3p1__neon_2x4_acc2(benchmark::State& state, const char* net) {
179 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2, 3, 3, 1, 1);
180 }
181
182 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4)
183 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_2x4)
184 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_3x4)
185 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_4x4)
186 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_5x4)
187 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_6x4)
188 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4_acc2)
189 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4_acc3)
190 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_1x4_acc4)
191 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neon_2x4_acc2)
192#endif // XNN_ARCH_ARM
193
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700194#if XNN_ARCH_ARM64
Marat Dukhan1268a242020-10-24 00:36:32 -0700195 static void dwconv2d_chw_3x3p1__neonfma_1x4(benchmark::State& state, const char* net) {
196 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4, 3, 3, 1, 1);
197 }
198 static void dwconv2d_chw_3x3p1__neonfma_2x4(benchmark::State& state, const char* net) {
199 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4, 3, 3, 1, 1);
200 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700201 static void dwconv2d_chw_3x3p1__neonfma_3x4(benchmark::State& state, const char* net) {
202 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700203 }
Marat Dukhan1268a242020-10-24 00:36:32 -0700204 static void dwconv2d_chw_3x3p1__neonfma_4x4(benchmark::State& state, const char* net) {
205 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4, 3, 3, 1, 1);
206 }
207 static void dwconv2d_chw_3x3p1__neonfma_5x4(benchmark::State& state, const char* net) {
208 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4, 3, 3, 1, 1);
209 }
210 static void dwconv2d_chw_3x3p1__neonfma_6x4(benchmark::State& state, const char* net) {
211 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4, 3, 3, 1, 1);
212 }
213 static void dwconv2d_chw_3x3p1__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
214 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2, 3, 3, 1, 1);
215 }
216 static void dwconv2d_chw_3x3p1__neonfma_1x4_acc3(benchmark::State& state, const char* net) {
217 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3, 3, 3, 1, 1);
218 }
219 static void dwconv2d_chw_3x3p1__neonfma_1x4_acc4(benchmark::State& state, const char* net) {
220 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4, 3, 3, 1, 1);
221 }
222 static void dwconv2d_chw_3x3p1__neonfma_2x4_acc2(benchmark::State& state, const char* net) {
223 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2, 3, 3, 1, 1);
224 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700225
Marat Dukhan82f0c322020-10-25 19:17:35 -0700226 static void dwconv2d_chw_3x3s2p1__neonfma_1x4(benchmark::State& state, const char* net) {
227 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4, 3, 3, 1, 2);
228 }
229 static void dwconv2d_chw_3x3s2p1__neonfma_2x4(benchmark::State& state, const char* net) {
230 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4, 3, 3, 1, 2);
231 }
232 static void dwconv2d_chw_3x3s2p1__neonfma_3x4(benchmark::State& state, const char* net) {
233 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4, 3, 3, 1, 2);
234 }
235 static void dwconv2d_chw_3x3s2p1__neonfma_4x4(benchmark::State& state, const char* net) {
236 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4, 3, 3, 1, 2);
237 }
238 static void dwconv2d_chw_3x3s2p1__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
239 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2, 3, 3, 1, 2);
240 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700241 static void dwconv2d_chw_3x3s2p1__neonfma_1x4_acc3(benchmark::State& state, const char* net) {
242 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700243 }
Marat Dukhan82f0c322020-10-25 19:17:35 -0700244 static void dwconv2d_chw_3x3s2p1__neonfma_1x4_acc4(benchmark::State& state, const char* net) {
245 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4, 3, 3, 1, 2);
246 }
247 static void dwconv2d_chw_3x3s2p1__neonfma_2x4_acc2(benchmark::State& state, const char* net) {
248 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2, 3, 3, 1, 2);
249 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700251 static void dwconv2d_chw_5x5p2__neonfma_1x4(benchmark::State& state, const char* net) {
252 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4, 5, 5, 2, 1);
253 }
254 static void dwconv2d_chw_5x5p2__neonfma_2x4(benchmark::State& state, const char* net) {
255 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4, 5, 5, 2, 1);
256 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700257 static void dwconv2d_chw_5x5p2__neonfma_3x4(benchmark::State& state, const char* net) {
258 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4, 5, 5, 2, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700259 }
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700260 static void dwconv2d_chw_5x5p2__neonfma_4x4(benchmark::State& state, const char* net) {
261 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4, 5, 5, 2, 1);
262 }
263 static void dwconv2d_chw_5x5p2__neonfma_5x4(benchmark::State& state, const char* net) {
264 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4, 5, 5, 2, 1);
265 }
266 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
267 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2, 5, 5, 2, 1);
268 }
269 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc3(benchmark::State& state, const char* net) {
270 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3, 5, 5, 2, 1);
271 }
272 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc4(benchmark::State& state, const char* net) {
273 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc4, 5, 5, 2, 1);
274 }
275 static void dwconv2d_chw_5x5p2__neonfma_1x4_acc5(benchmark::State& state, const char* net) {
276 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc5, 5, 5, 2, 1);
277 }
278 static void dwconv2d_chw_5x5p2__neonfma_2x4_acc2(benchmark::State& state, const char* net) {
279 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2, 5, 5, 2, 1);
280 }
281 static void dwconv2d_chw_5x5p2__neonfma_2x4_acc3(benchmark::State& state, const char* net) {
282 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3, 5, 5, 2, 1);
283 }
284 static void dwconv2d_chw_5x5p2__neonfma_3x4_acc2(benchmark::State& state, const char* net) {
285 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2, 5, 5, 2, 1);
286 }
287 static void dwconv2d_chw_5x5p2__neonfma_4x4_acc2(benchmark::State& state, const char* net) {
288 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2, 5, 5, 2, 1);
289 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700290
Marat Dukhan30d4b252020-10-29 16:33:22 -0700291 static void dwconv2d_chw_5x5s2p2__neonfma_1x4(benchmark::State& state, const char* net) {
292 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4, 5, 5, 2, 2);
293 }
294 static void dwconv2d_chw_5x5s2p2__neonfma_2x4(benchmark::State& state, const char* net) {
295 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4, 5, 5, 2, 2);
296 }
297 static void dwconv2d_chw_5x5s2p2__neonfma_3x4(benchmark::State& state, const char* net) {
298 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4, 5, 5, 2, 2);
299 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700300 static void dwconv2d_chw_5x5s2p2__neonfma_1x4_acc2(benchmark::State& state, const char* net) {
301 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2, 5, 5, 2, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700302 }
Marat Dukhan30d4b252020-10-29 16:33:22 -0700303 static void dwconv2d_chw_5x5s2p2__neonfma_1x4_acc3(benchmark::State& state, const char* net) {
304 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3, 5, 5, 2, 2);
305 }
306 static void dwconv2d_chw_5x5s2p2__neonfma_1x4_acc4(benchmark::State& state, const char* net) {
307 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4, 5, 5, 2, 2);
308 }
309 static void dwconv2d_chw_5x5s2p2__neonfma_1x4_acc5(benchmark::State& state, const char* net) {
310 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5, 5, 5, 2, 2);
311 }
312 static void dwconv2d_chw_5x5s2p2__neonfma_2x4_acc2(benchmark::State& state, const char* net) {
313 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2, 5, 5, 2, 2);
314 }
315 static void dwconv2d_chw_5x5s2p2__neonfma_2x4_acc3(benchmark::State& state, const char* net) {
316 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3, 5, 5, 2, 2);
317 }
318 static void dwconv2d_chw_5x5s2p2__neonfma_3x4_acc2(benchmark::State& state, const char* net) {
319 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2, 5, 5, 2, 2);
320 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700321
Marat Dukhan1268a242020-10-24 00:36:32 -0700322 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4)
323 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_2x4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700324 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_3x4)
Marat Dukhan1268a242020-10-24 00:36:32 -0700325 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_4x4)
326 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_5x4)
327 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_6x4)
328 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4_acc2)
329 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4_acc3)
330 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_1x4_acc4)
331 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__neonfma_2x4_acc2)
332
Marat Dukhan82f0c322020-10-25 19:17:35 -0700333 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4)
334 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_2x4)
335 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_3x4)
336 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_4x4)
337 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700338 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4_acc3)
Marat Dukhan82f0c322020-10-25 19:17:35 -0700339 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_1x4_acc4)
340 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__neonfma_2x4_acc2)
Marat Dukhan1268a242020-10-24 00:36:32 -0700341
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700342 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4)
343 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_2x4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700344 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_3x4)
Marat Dukhan149f0ea2020-10-26 12:50:33 -0700345 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_4x4)
346 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_5x4)
347 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc2)
348 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc3)
349 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc4)
350 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_1x4_acc5)
351 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_2x4_acc2)
352 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_2x4_acc3)
353 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_3x4_acc2)
354 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__neonfma_4x4_acc2)
Marat Dukhan1268a242020-10-24 00:36:32 -0700355
Marat Dukhan30d4b252020-10-29 16:33:22 -0700356 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_1x4)
357 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_2x4)
358 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_3x4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700359 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_1x4_acc2)
Marat Dukhan30d4b252020-10-29 16:33:22 -0700360 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_1x4_acc3)
361 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_1x4_acc4)
362 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_1x4_acc5)
363 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_2x4_acc2)
364 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_2x4_acc3)
365 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__neonfma_3x4_acc2)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700366#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700367
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700368#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan470078a2020-10-23 22:36:52 -0700369 static void dwconv2d_chw_3x3p1__sse_1x4(benchmark::State& state, const char* net) {
370 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4, 3, 3, 1, 1);
371 }
372 static void dwconv2d_chw_3x3p1__sse_2x4(benchmark::State& state, const char* net) {
373 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4, 3, 3, 1, 1);
374 }
375 static void dwconv2d_chw_3x3p1__sse_3x4(benchmark::State& state, const char* net) {
376 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4, 3, 3, 1, 1);
377 }
378 static void dwconv2d_chw_3x3p1__sse_4x4(benchmark::State& state, const char* net) {
379 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4, 3, 3, 1, 1);
380 }
381 static void dwconv2d_chw_3x3p1__sse_5x4(benchmark::State& state, const char* net) {
382 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4, 3, 3, 1, 1);
383 }
384 static void dwconv2d_chw_3x3p1__sse_6x4(benchmark::State& state, const char* net) {
385 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4, 3, 3, 1, 1);
386 }
387 static void dwconv2d_chw_3x3p1__sse_1x4_acc2(benchmark::State& state, const char* net) {
388 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2, 3, 3, 1, 1);
389 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700390 static void dwconv2d_chw_3x3p1__sse_1x4_acc3(benchmark::State& state, const char* net) {
391 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3, 3, 3, 1, 1);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700392 }
Marat Dukhan470078a2020-10-23 22:36:52 -0700393 static void dwconv2d_chw_3x3p1__sse_1x4_acc4(benchmark::State& state, const char* net) {
394 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4, 3, 3, 1, 1);
395 }
396 static void dwconv2d_chw_3x3p1__sse_2x4_acc2(benchmark::State& state, const char* net) {
397 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2, 3, 3, 1, 1);
398 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700399
Marat Dukhan98f2eeb2020-10-23 23:13:41 -0700400 static void dwconv2d_chw_3x3p1__ssse3_1x4(benchmark::State& state, const char* net) {
401 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
402 }
403 static void dwconv2d_chw_3x3p1__ssse3_2x4(benchmark::State& state, const char* net) {
404 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
405 }
406 static void dwconv2d_chw_3x3p1__ssse3_3x4(benchmark::State& state, const char* net) {
407 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
408 }
409 static void dwconv2d_chw_3x3p1__ssse3_4x4(benchmark::State& state, const char* net) {
410 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
411 }
412 static void dwconv2d_chw_3x3p1__ssse3_5x4(benchmark::State& state, const char* net) {
413 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
414 }
415 static void dwconv2d_chw_3x3p1__ssse3_6x4(benchmark::State& state, const char* net) {
416 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
417 }
418 static void dwconv2d_chw_3x3p1__ssse3_1x4_acc2(benchmark::State& state, const char* net) {
419 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
420 }
421 static void dwconv2d_chw_3x3p1__ssse3_1x4_acc3(benchmark::State& state, const char* net) {
422 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
423 }
424 static void dwconv2d_chw_3x3p1__ssse3_1x4_acc4(benchmark::State& state, const char* net) {
425 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
426 }
427 static void dwconv2d_chw_3x3p1__ssse3_2x4_acc2(benchmark::State& state, const char* net) {
428 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2, 3, 3, 1, 1, benchmark::utils::CheckSSSE3);
429 }
430
Marat Dukhan0ff97182020-10-25 19:14:03 -0700431 static void dwconv2d_chw_3x3s2p1__sse_1x4(benchmark::State& state, const char* net) {
432 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4, 3, 3, 1, 2);
433 }
434 static void dwconv2d_chw_3x3s2p1__sse_2x4(benchmark::State& state, const char* net) {
435 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4, 3, 3, 1, 2);
436 }
437 static void dwconv2d_chw_3x3s2p1__sse_3x4(benchmark::State& state, const char* net) {
438 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4, 3, 3, 1, 2);
439 }
440 static void dwconv2d_chw_3x3s2p1__sse_4x4(benchmark::State& state, const char* net) {
441 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4, 3, 3, 1, 2);
442 }
443 static void dwconv2d_chw_3x3s2p1__sse_1x4_acc2(benchmark::State& state, const char* net) {
444 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc2, 3, 3, 1, 2);
445 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700446 static void dwconv2d_chw_3x3s2p1__sse_1x4_acc3(benchmark::State& state, const char* net) {
447 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3, 3, 3, 1, 2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700448 }
Marat Dukhan0ff97182020-10-25 19:14:03 -0700449 static void dwconv2d_chw_3x3s2p1__sse_1x4_acc4(benchmark::State& state, const char* net) {
450 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc4, 3, 3, 1, 2);
451 }
452 static void dwconv2d_chw_3x3s2p1__sse_2x4_acc2(benchmark::State& state, const char* net) {
453 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2, 3, 3, 1, 2);
454 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700455
Marat Dukhan470078a2020-10-23 22:36:52 -0700456 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4)
457 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_2x4)
458 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_3x4)
459 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_4x4)
460 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_5x4)
461 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_6x4)
462 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700463 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4_acc3)
Marat Dukhan470078a2020-10-23 22:36:52 -0700464 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_1x4_acc4)
465 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__sse_2x4_acc2)
466
Marat Dukhan98f2eeb2020-10-23 23:13:41 -0700467 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4)
468 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_2x4)
469 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_3x4)
470 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_4x4)
471 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_5x4)
472 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_6x4)
473 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4_acc2)
474 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4_acc3)
475 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_1x4_acc4)
476 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__ssse3_2x4_acc2)
477
Marat Dukhan0ff97182020-10-25 19:14:03 -0700478 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4)
479 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_2x4)
480 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_3x4)
481 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_4x4)
482 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700483 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4_acc3)
Marat Dukhan0ff97182020-10-25 19:14:03 -0700484 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_1x4_acc4)
485 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__sse_2x4_acc2)
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700486#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700487
Marat Dukhana199d492020-07-24 15:01:25 -0700488#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanbf715f92020-10-23 20:17:00 -0700489 static void dwconv2d_chw_3x3p1__psimd_1x4_acc3(benchmark::State& state, const char* net) {
490 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__psimd_1x4_acc3, 3, 3, 1, 1);
Erich Elsene6214af2020-06-10 22:17:22 -0700491 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700492 static void dwconv2d_chw_3x3s2p1__psimd_1x4_acc3(benchmark::State& state, const char* net) {
493 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__psimd_1x4_acc3, 3, 3, 1, 2);
Erich Elsenfd7a6e32020-06-11 12:04:44 -0700494 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700495 static void dwconv2d_chw_5x5p2__psimd_3x4(benchmark::State& state, const char* net) {
496 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__psimd_3x4, 5, 5, 2, 1);
Erich Elsen28928892020-06-12 08:08:19 -0700497 }
Marat Dukhanbf715f92020-10-23 20:17:00 -0700498 static void dwconv2d_chw_5x5s2p2__psimd_1x4_acc2(benchmark::State& state, const char* net) {
499 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__psimd_1x4_acc2, 5, 5, 2, 2);
Erich Elsen7465a892020-06-13 14:02:04 -0700500 }
501
Marat Dukhanbf715f92020-10-23 20:17:00 -0700502 BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__psimd_1x4_acc3)
503 BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__psimd_1x4_acc3)
504 BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__psimd_3x4)
505 BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__psimd_1x4_acc2)
Marat Dukhana199d492020-07-24 15:01:25 -0700506#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Erich Elsene6214af2020-06-10 22:17:22 -0700507
Marat Dukhan91249d22020-10-24 12:02:51 -0700508static void dwconv2d_chw_3x3p1__scalar_1x1(benchmark::State& state, const char* net) {
509 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1, 3, 3, 1, 1);
510}
511static void dwconv2d_chw_3x3p1__scalar_2x1(benchmark::State& state, const char* net) {
512 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1, 3, 3, 1, 1);
513}
514static void dwconv2d_chw_3x3p1__scalar_3x1(benchmark::State& state, const char* net) {
515 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1, 3, 3, 1, 1);
516}
517static void dwconv2d_chw_3x3p1__scalar_4x1(benchmark::State& state, const char* net) {
518 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1, 3, 3, 1, 1);
519}
520static void dwconv2d_chw_3x3p1__scalar_5x1(benchmark::State& state, const char* net) {
521 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1, 3, 3, 1, 1);
522}
523static void dwconv2d_chw_3x3p1__scalar_6x1(benchmark::State& state, const char* net) {
524 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1, 3, 3, 1, 1);
525}
526static void dwconv2d_chw_3x3p1__scalar_1x1_acc2(benchmark::State& state, const char* net) {
527 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2, 3, 3, 1, 1);
528}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700529static void dwconv2d_chw_3x3p1__scalar_1x1_acc3(benchmark::State& state, const char* net) {
530 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3, 3, 3, 1, 1);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700531}
Marat Dukhan91249d22020-10-24 12:02:51 -0700532static void dwconv2d_chw_3x3p1__scalar_1x1_acc4(benchmark::State& state, const char* net) {
533 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4, 3, 3, 1, 1);
534}
535static void dwconv2d_chw_3x3p1__scalar_2x1_acc2(benchmark::State& state, const char* net) {
536 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2, 3, 3, 1, 1);
537}
Erich Elsen0cc2c532019-10-15 04:44:18 -0700538
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700539static void dwconv2d_chw_3x3s2p1__scalar_1x1(benchmark::State& state, const char* net) {
540 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1, 3, 3, 1, 2);
541}
542static void dwconv2d_chw_3x3s2p1__scalar_2x1(benchmark::State& state, const char* net) {
543 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1, 3, 3, 1, 2);
544}
545static void dwconv2d_chw_3x3s2p1__scalar_3x1(benchmark::State& state, const char* net) {
546 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1, 3, 3, 1, 2);
547}
548static void dwconv2d_chw_3x3s2p1__scalar_4x1(benchmark::State& state, const char* net) {
549 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1, 3, 3, 1, 2);
550}
551static void dwconv2d_chw_3x3s2p1__scalar_1x1_acc2(benchmark::State& state, const char* net) {
552 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2, 3, 3, 1, 2);
553}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700554static void dwconv2d_chw_3x3s2p1__scalar_1x1_acc3(benchmark::State& state, const char* net) {
555 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3, 3, 3, 1, 2);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700556}
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700557static void dwconv2d_chw_3x3s2p1__scalar_1x1_acc4(benchmark::State& state, const char* net) {
558 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4, 3, 3, 1, 2);
559}
560static void dwconv2d_chw_3x3s2p1__scalar_2x1_acc2(benchmark::State& state, const char* net) {
561 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2, 3, 3, 1, 2);
562}
Erich Elsen38709a62019-11-08 11:58:45 -0800563
Marat Dukhanc4efb002020-10-25 23:14:47 -0700564static void dwconv2d_chw_5x5p2__scalar_1x1(benchmark::State& state, const char* net) {
565 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1, 5, 5, 2, 1);
566}
567static void dwconv2d_chw_5x5p2__scalar_2x1(benchmark::State& state, const char* net) {
568 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1, 5, 5, 2, 1);
569}
570static void dwconv2d_chw_5x5p2__scalar_3x1(benchmark::State& state, const char* net) {
571 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1, 5, 5, 2, 1);
572}
573static void dwconv2d_chw_5x5p2__scalar_1x1_acc2(benchmark::State& state, const char* net) {
574 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2, 5, 5, 2, 1);
575}
576static void dwconv2d_chw_5x5p2__scalar_1x1_acc3(benchmark::State& state, const char* net) {
577 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3, 5, 5, 2, 1);
578}
579static void dwconv2d_chw_5x5p2__scalar_1x1_acc4(benchmark::State& state, const char* net) {
580 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4, 5, 5, 2, 1);
581}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700582static void dwconv2d_chw_5x5p2__scalar_1x1_acc5(benchmark::State& state, const char* net) {
583 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5, 5, 5, 2, 1);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700584}
Marat Dukhanc4efb002020-10-25 23:14:47 -0700585static void dwconv2d_chw_5x5p2__scalar_2x1_acc2(benchmark::State& state, const char* net) {
586 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2, 5, 5, 2, 1);
587}
588static void dwconv2d_chw_5x5p2__scalar_2x1_acc3(benchmark::State& state, const char* net) {
589 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3, 5, 5, 2, 1);
590}
591static void dwconv2d_chw_5x5p2__scalar_3x1_acc2(benchmark::State& state, const char* net) {
592 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2, 5, 5, 2, 1);
593}
Erich Elsenac4de802019-10-16 04:35:30 -0700594
Marat Dukhan29c0c332020-10-28 22:11:00 -0700595static void dwconv2d_chw_5x5s2p2__scalar_1x1(benchmark::State& state, const char* net) {
596 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1, 5, 5, 2, 2);
597}
598static void dwconv2d_chw_5x5s2p2__scalar_2x1(benchmark::State& state, const char* net) {
599 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1, 5, 5, 2, 2);
600}
601static void dwconv2d_chw_5x5s2p2__scalar_3x1(benchmark::State& state, const char* net) {
602 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1, 5, 5, 2, 2);
603}
604static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc2(benchmark::State& state, const char* net) {
605 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2, 5, 5, 2, 2);
606}
607static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc3(benchmark::State& state, const char* net) {
608 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3, 5, 5, 2, 2);
609}
610static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc4(benchmark::State& state, const char* net) {
611 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4, 5, 5, 2, 2);
612}
Marat Dukhanbf715f92020-10-23 20:17:00 -0700613static void dwconv2d_chw_5x5s2p2__scalar_1x1_acc5(benchmark::State& state, const char* net) {
614 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5, 5, 5, 2, 2);
Marat Dukhanae7e8b22020-10-20 17:51:51 -0700615}
Marat Dukhan29c0c332020-10-28 22:11:00 -0700616static void dwconv2d_chw_5x5s2p2__scalar_2x1_acc2(benchmark::State& state, const char* net) {
617 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2, 5, 5, 2, 2);
618}
619static void dwconv2d_chw_5x5s2p2__scalar_2x1_acc3(benchmark::State& state, const char* net) {
620 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3, 5, 5, 2, 2);
621}
622static void dwconv2d_chw_5x5s2p2__scalar_3x1_acc2(benchmark::State& state, const char* net) {
623 DWConv2DBenchmark(state, xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2, 5, 5, 2, 2);
624}
Erich Elsen38709a62019-11-08 11:58:45 -0800625
Marat Dukhan91249d22020-10-24 12:02:51 -0700626BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1)
627BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_2x1)
628BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_3x1)
629BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_4x1)
630BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_5x1)
631BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_6x1)
632BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700633BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1_acc3)
Marat Dukhan91249d22020-10-24 12:02:51 -0700634BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_1x1_acc4)
635BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__scalar_2x1_acc2)
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700636
637BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1)
638BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_2x1)
639BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_3x1)
640BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_4x1)
641BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1_acc2)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700642BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1_acc3)
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700643BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_1x1_acc4)
644BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__scalar_2x1_acc2)
645
Marat Dukhanc4efb002020-10-25 23:14:47 -0700646BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1)
647BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_2x1)
648BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_3x1)
649BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc2)
650BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc3)
651BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700652BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_1x1_acc5)
Marat Dukhanc4efb002020-10-25 23:14:47 -0700653BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_2x1_acc2)
654BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_2x1_acc3)
655BENCHMARK_DWCONV(dwconv2d_chw_5x5p2__scalar_3x1_acc2)
Marat Dukhancf5b3c32020-10-25 19:21:10 -0700656
Marat Dukhan29c0c332020-10-28 22:11:00 -0700657BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1)
658BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_2x1)
659BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_3x1)
660BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc2)
661BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc3)
662BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc4)
Marat Dukhanbf715f92020-10-23 20:17:00 -0700663BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_1x1_acc5)
Marat Dukhan29c0c332020-10-28 22:11:00 -0700664BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_2x1_acc2)
665BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_2x1_acc3)
666BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__scalar_3x1_acc2)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700667
668#ifndef XNNPACK_BENCHMARK_NO_MAIN
669BENCHMARK_MAIN();
670#endif