blob: b0629d7dd7472a01c3d98f40d233f3a08e19d8c5 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070013#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <ostream>
15#include <random>
16#include <string>
17#include <vector>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack.h>
20
Frank Barchardbb4c18b2019-09-30 11:05:52 -070021#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
22#include "arm_compute/core/Types.h"
23#include "arm_compute/runtime/Tensor.h"
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
26#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
27#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
XNNPACK Teamb455b122019-09-27 18:10:33 -070028#include <benchmark/benchmark.h>
Frank Barchard49b4dcc2020-06-26 14:07:19 -070029#include <fp16.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070030#ifdef BENCHMARK_TENSORFLOW_LITE
31#include "flatbuffers/include/flatbuffers/flatbuffers.h"
32#include "tensorflow/lite/interpreter.h"
33#include "tensorflow/lite/kernels/register.h"
34#include "tensorflow/lite/model.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070035#include "tensorflow/lite/schema/schema_generated.h"
36#include "tensorflow/lite/version.h"
37#endif // BENCHMARK_TENSORFLOW_LITE
Frank Barchardbb4c18b2019-09-30 11:05:52 -070038#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070039
Chao Meic6640272020-07-23 09:35:11 -070040#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -070041void xnnpack_convolution_qu8(benchmark::State& state, const char* net) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070042 const size_t batch_size = state.range(0);
43 const size_t input_height = state.range(1);
44 const size_t input_width = state.range(2);
45 const size_t kernel_height = state.range(3);
46 const size_t kernel_width = state.range(4);
47 const size_t padding_height = state.range(5);
48 const size_t padding_width = state.range(6);
49 const size_t subsampling = state.range(7);
50 const size_t dilation = state.range(8);
51 const size_t groups = state.range(9);
52 const size_t group_input_channels = state.range(10);
53 const size_t group_output_channels = state.range(11);
54
55 std::random_device random_device;
56 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -070057 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
Marat Dukhan44f0ca72020-08-02 21:46:58 -070058 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070059
60 const size_t output_pixel_stride = groups * group_output_channels;
61 const size_t input_pixel_stride = groups * group_input_channels;
62 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
63 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
64 const size_t padding_left = padding_width / 2;
65 const size_t padding_top = padding_height / 2;
66 const size_t padding_right = padding_width - padding_left;
67 const size_t padding_bottom = padding_height - padding_top;
68 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
69 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
70
71 std::vector<uint8_t> input(batch_size * input_height * input_width * input_pixel_stride);
72 std::generate(input.begin(), input.end(), std::ref(u8rng));
73 std::vector<uint8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
74 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
75 std::vector<int32_t> bias(groups * group_output_channels);
Marat Dukhanecd83112020-08-03 21:50:28 -070076 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070077 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
78
Marat Dukhan04f03be2019-11-19 12:36:47 -080079 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070080 if (status != xnn_status_success) {
81 state.SkipWithError("failed to initialize XNNPACK");
82 return;
83 }
84
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -070086 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -070087 sizeof(uint8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(uint8_t) * output_elements);
88 std::vector<uint8_t> output(output_elements * num_buffers);
89
90 std::vector<xnn_operator_t> convolution_operators(num_buffers);
91 for (xnn_operator_t& convolution_op : convolution_operators) {
Marat Dukhan08b7a972020-07-14 18:17:29 -070092 status = xnn_create_convolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 padding_top, padding_right, padding_bottom, padding_left,
94 kernel_height, kernel_width,
95 subsampling, subsampling,
96 dilation, dilation,
97 groups, group_input_channels, group_output_channels,
98 input_pixel_stride, output_pixel_stride,
99 127, 0.5f,
100 127, 0.5f,
101 kernel.data(), bias.data(),
102 127, 0.5f, 0, 255,
103 0 /* flags */, &convolution_op);
104 if (status != xnn_status_success) {
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700105 state.SkipWithError("failed to create QUINT8 Convolution operator");
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106 return;
107 }
108 }
109
110 for (size_t i = 0; i < convolution_operators.size(); i++) {
Marat Dukhan08b7a972020-07-14 18:17:29 -0700111 status = xnn_setup_convolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 convolution_operators[i],
113 batch_size, input_height, input_width,
114 input.data(), output.data() + i * output_elements,
115 nullptr /* thread pool */);
116 if (status != xnn_status_success) {
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700117 state.SkipWithError("failed to setup QUINT8 Convolution operator");
118 return;
119 }
120 }
121
122 size_t buffer_index = 0;
123 for (auto _ : state) {
124 state.PauseTiming();
125 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
126 buffer_index = (buffer_index + 1) % num_buffers;
127 state.ResumeTiming();
128
129 status = xnn_run_operator(convolution_operators[buffer_index],
130 nullptr /* thread pool */);
131 if (status != xnn_status_success) {
132 state.SkipWithError("failed to run QUINT8 Convolution operator");
133 return;
134 }
135 }
136
137 for (xnn_operator_t& convolution_op : convolution_operators) {
138 status = xnn_delete_operator(convolution_op);
139 if (status != xnn_status_success) {
140 state.SkipWithError("failed to delete QUINT8 Convolution operator");
141 return;
142 }
143 convolution_op = nullptr;
144 }
145
146 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
147 state.counters["OPS"] = benchmark::Counter(
148 uint64_t(state.iterations()) * 2 *
149 batch_size * output_height * output_width *
150 groups * group_input_channels * group_output_channels *
151 kernel_height * kernel_width,
152 benchmark::Counter::kIsRate);
153}
154#endif // XNN_NO_QU8_OPERATORS
155
156#ifndef XNN_NO_QS8_OPERATORS
157void xnnpack_convolution_qs8(benchmark::State& state, const char* net) {
158 const size_t batch_size = state.range(0);
159 const size_t input_height = state.range(1);
160 const size_t input_width = state.range(2);
161 const size_t kernel_height = state.range(3);
162 const size_t kernel_width = state.range(4);
163 const size_t padding_height = state.range(5);
164 const size_t padding_width = state.range(6);
165 const size_t subsampling = state.range(7);
166 const size_t dilation = state.range(8);
167 const size_t groups = state.range(9);
168 const size_t group_input_channels = state.range(10);
169 const size_t group_output_channels = state.range(11);
170
171 std::random_device random_device;
172 auto rng = std::mt19937(random_device());
173 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
174 auto i8rng = std::bind(
175 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
176
177 const size_t output_pixel_stride = groups * group_output_channels;
178 const size_t input_pixel_stride = groups * group_input_channels;
179 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
180 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
181 const size_t padding_left = padding_width / 2;
182 const size_t padding_top = padding_height / 2;
183 const size_t padding_right = padding_width - padding_left;
184 const size_t padding_bottom = padding_height - padding_top;
185 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
186 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
187
188 std::vector<int8_t> input(batch_size * input_height * input_width * input_pixel_stride);
189 std::generate(input.begin(), input.end(), std::ref(i8rng));
190 std::vector<int8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
191 std::generate(kernel.begin(), kernel.end(), std::ref(i8rng));
192 std::vector<int32_t> bias(groups * group_output_channels);
193 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
194 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
195
196 xnn_status status = xnn_initialize(nullptr /* allocator */);
197 if (status != xnn_status_success) {
198 state.SkipWithError("failed to initialize XNNPACK");
199 return;
200 }
201
202 const size_t num_buffers = 1 +
203 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
204 sizeof(int8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(int8_t) * output_elements);
205 std::vector<int8_t> output(output_elements * num_buffers);
206
207 std::vector<xnn_operator_t> convolution_operators(num_buffers);
208 for (xnn_operator_t& convolution_op : convolution_operators) {
209 status = xnn_create_convolution2d_nhwc_qs8(
210 padding_top, padding_right, padding_bottom, padding_left,
211 kernel_height, kernel_width,
212 subsampling, subsampling,
213 dilation, dilation,
214 groups, group_input_channels, group_output_channels,
215 input_pixel_stride, output_pixel_stride,
216 127, 0.5f, 0.5f,
217 kernel.data(), bias.data(),
218 127, 0.5f, -128, 127,
219 0 /* flags */, &convolution_op);
220 if (status != xnn_status_success) {
221 state.SkipWithError("failed to create QINT8 Convolution operator");
222 return;
223 }
224 }
225
226 for (size_t i = 0; i < convolution_operators.size(); i++) {
227 status = xnn_setup_convolution2d_nhwc_qs8(
228 convolution_operators[i],
229 batch_size, input_height, input_width,
230 input.data(), output.data() + i * output_elements,
231 nullptr /* thread pool */);
232 if (status != xnn_status_success) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700233 state.SkipWithError("failed to setup QINT8 Convolution operator");
234 return;
235 }
236 }
237
238 size_t buffer_index = 0;
239 for (auto _ : state) {
240 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700241 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700242 buffer_index = (buffer_index + 1) % num_buffers;
243 state.ResumeTiming();
244
245 status = xnn_run_operator(convolution_operators[buffer_index],
246 nullptr /* thread pool */);
247 if (status != xnn_status_success) {
248 state.SkipWithError("failed to run QINT8 Convolution operator");
249 return;
250 }
251 }
252
253 for (xnn_operator_t& convolution_op : convolution_operators) {
254 status = xnn_delete_operator(convolution_op);
255 if (status != xnn_status_success) {
256 state.SkipWithError("failed to delete QINT8 Convolution operator");
257 return;
258 }
259 convolution_op = nullptr;
260 }
261
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700262 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700263 state.counters["OPS"] = benchmark::Counter(
264 uint64_t(state.iterations()) * 2 *
265 batch_size * output_height * output_width *
266 groups * group_input_channels * group_output_channels *
267 kernel_height * kernel_width,
268 benchmark::Counter::kIsRate);
269}
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700270#endif // XNN_NO_QS8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700271
Chao Meic6640272020-07-23 09:35:11 -0700272#ifndef XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700273void xnnpack_convolution_f16(benchmark::State& state, const char* net) {
274 if (!benchmark::utils::CheckNEONFP16ARITH(state)) {
275 return;
276 }
277 const size_t batch_size = state.range(0);
278 const size_t input_height = state.range(1);
279 const size_t input_width = state.range(2);
280 const size_t kernel_height = state.range(3);
281 const size_t kernel_width = state.range(4);
282 const size_t padding_height = state.range(5);
283 const size_t padding_width = state.range(6);
284 const size_t subsampling = state.range(7);
285 const size_t dilation = state.range(8);
286 const size_t groups = state.range(9);
287 const size_t group_input_channels = state.range(10);
288 const size_t group_output_channels = state.range(11);
289
290 std::random_device random_device;
291 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700292 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700293 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
294
295 const size_t output_pixel_stride = groups * group_output_channels;
296 const size_t input_pixel_stride = groups * group_input_channels;
297 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
298 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
299 const size_t padding_left = padding_width / 2;
300 const size_t padding_top = padding_height / 2;
301 const size_t padding_right = padding_width - padding_left;
302 const size_t padding_bottom = padding_height - padding_top;
303 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
304 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
305
306 std::vector<uint16_t> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(uint16_t));
307 std::generate(input.begin(), input.end(), std::ref(f16rng));
308 std::vector<uint16_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
309 std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));
310 std::vector<uint16_t> bias(groups * group_output_channels);
311 std::generate(bias.begin(), bias.end(), std::ref(f16rng));
312 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
313
314 xnn_status status = xnn_initialize(nullptr /* allocator */);
315 if (status != xnn_status_success) {
316 state.SkipWithError("failed to initialize XNNPACK");
317 return;
318 }
319
320 const size_t num_buffers = 1 +
321 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
322 sizeof(uint16_t) * (kernel.size() + bias.size() + output_elements));
323 std::vector<uint16_t> output(output_elements * num_buffers);
324
325 std::vector<xnn_operator_t> convolution_operators(num_buffers);
326 for (xnn_operator_t& convolution_op : convolution_operators) {
327 status = xnn_create_convolution2d_nhwc_f16(
328 padding_top, padding_right, padding_bottom, padding_left,
329 kernel_height, kernel_width,
330 subsampling, subsampling,
331 dilation, dilation,
332 groups, group_input_channels, group_output_channels,
333 input_pixel_stride, output_pixel_stride,
334 kernel.data(), bias.data(),
335 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
336 0 /* flags */, &convolution_op);
337 if (status != xnn_status_success) {
338 state.SkipWithError("failed to create FP16 Convolution operator");
339 return;
340 }
341 }
342
343 for (size_t i = 0; i < convolution_operators.size(); i++) {
344 status = xnn_setup_convolution2d_nhwc_f16(
345 convolution_operators[i],
346 batch_size, input_height, input_width,
347 input.data(), output.data() + i * output_elements,
348 nullptr /* thread pool */);
349 if (status != xnn_status_success) {
350 state.SkipWithError("failed to setup FP16 Convolution operator");
351 return;
352 }
353 }
354
355 size_t buffer_index = 0;
356 for (auto _ : state) {
357 state.PauseTiming();
358 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint16_t));
359 buffer_index = (buffer_index + 1) % num_buffers;
360 state.ResumeTiming();
361
362 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
363 if (status != xnn_status_success) {
364 state.SkipWithError("failed to run FP16 Convolution operator");
365 return;
366 }
367 }
368
369 for (xnn_operator_t& convolution_op : convolution_operators) {
370 status = xnn_delete_operator(convolution_op);
371 if (status != xnn_status_success) {
372 state.SkipWithError("failed to delete FP16 Convolution operator");
373 return;
374 }
375 convolution_op = nullptr;
376 }
377
378 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
379 state.counters["FLOPS"] = benchmark::Counter(
380 uint64_t(state.iterations()) * 2 *
381 batch_size * output_height * output_width *
382 groups * group_input_channels * group_output_channels *
383 kernel_height * kernel_width,
384 benchmark::Counter::kIsRate);
385}
Chao Meic6640272020-07-23 09:35:11 -0700386#endif // XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700387
XNNPACK Teamb455b122019-09-27 18:10:33 -0700388void xnnpack_convolution_f32(benchmark::State& state, const char* net) {
389 const size_t batch_size = state.range(0);
390 const size_t input_height = state.range(1);
391 const size_t input_width = state.range(2);
392 const size_t kernel_height = state.range(3);
393 const size_t kernel_width = state.range(4);
394 const size_t padding_height = state.range(5);
395 const size_t padding_width = state.range(6);
396 const size_t subsampling = state.range(7);
397 const size_t dilation = state.range(8);
398 const size_t groups = state.range(9);
399 const size_t group_input_channels = state.range(10);
400 const size_t group_output_channels = state.range(11);
401
402 std::random_device random_device;
403 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700404 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700405
406 const size_t output_pixel_stride = groups * group_output_channels;
407 const size_t input_pixel_stride = groups * group_input_channels;
408 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
409 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
410 const size_t padding_left = padding_width / 2;
411 const size_t padding_top = padding_height / 2;
412 const size_t padding_right = padding_width - padding_left;
413 const size_t padding_bottom = padding_height - padding_top;
414 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
415 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
416
417 std::vector<float> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(float));
418 std::generate(input.begin(), input.end(), std::ref(f32rng));
419 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
420 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
421 std::vector<float> bias(groups * group_output_channels);
422 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
423 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
424
Marat Dukhan04f03be2019-11-19 12:36:47 -0800425 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700426 if (status != xnn_status_success) {
427 state.SkipWithError("failed to initialize XNNPACK");
428 return;
429 }
430
XNNPACK Teamb455b122019-09-27 18:10:33 -0700431 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -0700432 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700433 sizeof(float) * (kernel.size() + bias.size() + output_elements));
434 std::vector<float> output(output_elements * num_buffers);
435
436 std::vector<xnn_operator_t> convolution_operators(num_buffers);
437 for (xnn_operator_t& convolution_op : convolution_operators) {
438 status = xnn_create_convolution2d_nhwc_f32(
439 padding_top, padding_right, padding_bottom, padding_left,
440 kernel_height, kernel_width,
441 subsampling, subsampling,
442 dilation, dilation,
443 groups, group_input_channels, group_output_channels,
444 input_pixel_stride, output_pixel_stride,
445 kernel.data(), bias.data(),
446 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
447 0 /* flags */, &convolution_op);
448 if (status != xnn_status_success) {
449 state.SkipWithError("failed to create FP32 Convolution operator");
450 return;
451 }
452 }
453
454 for (size_t i = 0; i < convolution_operators.size(); i++) {
455 status = xnn_setup_convolution2d_nhwc_f32(
456 convolution_operators[i],
457 batch_size, input_height, input_width,
458 input.data(), output.data() + i * output_elements,
459 nullptr /* thread pool */);
460 if (status != xnn_status_success) {
461 state.SkipWithError("failed to setup FP32 Convolution operator");
462 return;
463 }
464 }
465
466 size_t buffer_index = 0;
467 for (auto _ : state) {
468 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700469 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700470 buffer_index = (buffer_index + 1) % num_buffers;
471 state.ResumeTiming();
472
473 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
474 if (status != xnn_status_success) {
475 state.SkipWithError("failed to run FP32 Convolution operator");
476 return;
477 }
478 }
479
480 for (xnn_operator_t& convolution_op : convolution_operators) {
481 status = xnn_delete_operator(convolution_op);
482 if (status != xnn_status_success) {
483 state.SkipWithError("failed to delete FP32 Convolution operator");
484 return;
485 }
486 convolution_op = nullptr;
487 }
488
489 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
490 state.counters["FLOPS"] = benchmark::Counter(
491 uint64_t(state.iterations()) * 2 *
492 batch_size * output_height * output_width *
493 groups * group_input_channels * group_output_channels *
494 kernel_height * kernel_width,
495 benchmark::Counter::kIsRate);
496}
497
498#ifdef BENCHMARK_TENSORFLOW_LITE
499void tflite_convolution_f32(benchmark::State& state, const char* net) {
500 const size_t batch_size = state.range(0);
501 const size_t input_height = state.range(1);
502 const size_t input_width = state.range(2);
503 const size_t kernel_height = state.range(3);
504 const size_t kernel_width = state.range(4);
505 const size_t padding_height = state.range(5);
506 const size_t padding_width = state.range(6);
507 const size_t subsampling = state.range(7);
508 const size_t dilation = state.range(8);
509 const size_t groups = state.range(9);
510 const size_t group_input_channels = state.range(10);
511 const size_t group_output_channels = state.range(11);
512
513 bool is_depthwise = false;
514 if (groups != 1) {
515 if (group_input_channels == 1) {
516 is_depthwise = true;
517 } else {
518 state.SkipWithError("grouped convolution is not supported");
519 return;
520 }
521 }
522
523 std::random_device random_device;
524 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700525 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700526
527 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
528 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
529
530 tflite::Padding padding = tflite::Padding_VALID;
531 if (padding_width == (effective_kernel_width - 1) && padding_height == (effective_kernel_height - 1)) {
532 padding = tflite::Padding_SAME;
533 } else if (padding_width == 0 && padding_height == 0) {
534 padding = tflite::Padding_VALID;
535 } else {
536 state.SkipWithError("unsupported padding");
537 return;
538 }
539
540 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
541 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
542
543 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
544 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
545 std::vector<float> bias(groups * group_output_channels);
546 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
547
548 flatbuffers::FlatBufferBuilder builder;
549 flatbuffers::Offset<tflite::OperatorCode> operator_code =
550 CreateOperatorCode(
551 builder,
552 is_depthwise ? tflite::BuiltinOperator_DEPTHWISE_CONV_2D : tflite::BuiltinOperator_CONV_2D,
553 0);
554
555 flatbuffers::Offset<tflite::Conv2DOptions> conv2d_options = CreateConv2DOptions(
556 builder,
557 padding,
558 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
559 tflite::ActivationFunctionType_NONE,
560 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
561
562 flatbuffers::Offset<tflite::DepthwiseConv2DOptions> dwconv2d_options = CreateDepthwiseConv2DOptions(
563 builder,
564 padding,
565 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
566 static_cast<int32_t>(group_output_channels),
567 tflite::ActivationFunctionType_NONE,
568 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
569
570 flatbuffers::Offset<tflite::Buffer> buffers[3] = {
571 tflite::CreateBuffer(builder, builder.CreateVector({})),
572 tflite::CreateBuffer(builder, builder.CreateVector(
573 reinterpret_cast<const uint8_t*>(kernel.data()),
574 sizeof(float) * kernel.size())),
575 tflite::CreateBuffer(builder, builder.CreateVector(
576 reinterpret_cast<const uint8_t*>(bias.data()),
577 sizeof(float) * bias.size())),
578 };
579
580 const int32_t input_shape[4] = {
581 static_cast<int32_t>(batch_size),
582 static_cast<int32_t>(input_height),
583 static_cast<int32_t>(input_width),
584 static_cast<int32_t>(groups * group_input_channels)
585 };
586 const int32_t output_shape[4] = {
587 static_cast<int32_t>(batch_size),
588 static_cast<int32_t>(output_height),
589 static_cast<int32_t>(output_width),
590 static_cast<int32_t>(groups * group_output_channels)
591 };
592 const int32_t filter_shape[4] = {
593 static_cast<int32_t>(group_output_channels),
594 static_cast<int32_t>(kernel_height),
595 static_cast<int32_t>(kernel_width),
596 static_cast<int32_t>(groups * group_input_channels)
597 };
598 const int32_t bias_shape[1] = {
599 static_cast<int32_t>(groups * group_output_channels)
600 };
601
602 flatbuffers::Offset<tflite::Tensor> tensors[4] = {
603 tflite::CreateTensor(builder,
604 builder.CreateVector<int32_t>(input_shape, 4),
605 tflite::TensorType_FLOAT32,
606 0 /* buffer id */,
607 builder.CreateString("input")),
608 tflite::CreateTensor(builder,
609 builder.CreateVector<int32_t>(filter_shape, 4),
610 tflite::TensorType_FLOAT32,
611 1 /* buffer id */,
612 builder.CreateString("filter")),
613 tflite::CreateTensor(builder,
614 builder.CreateVector<int32_t>(bias_shape, 1),
615 tflite::TensorType_FLOAT32,
616 2 /* buffer id */,
617 builder.CreateString("bias")),
618 tflite::CreateTensor(builder,
619 builder.CreateVector<int32_t>(output_shape, 4),
620 tflite::TensorType_FLOAT32,
621 0 /* buffer id */,
622 builder.CreateString("output")),
623 };
624
625 const int32_t op_inputs[3] = { 0, 1, 2 };
626 const int32_t op_outputs[1] = { 3 };
627 flatbuffers::Offset<tflite::Operator> op = CreateOperator(
628 builder,
629 0 /* opcode_index */,
630 builder.CreateVector<int32_t>(op_inputs, 3),
631 builder.CreateVector<int32_t>(op_outputs, 1),
632 is_depthwise ? tflite::BuiltinOptions_DepthwiseConv2DOptions : tflite::BuiltinOptions_Conv2DOptions,
633 is_depthwise ? dwconv2d_options.Union() : conv2d_options.Union(),
634 /*custom_options */ 0,
635 tflite::CustomOptionsFormat_FLEXBUFFERS);
636
637 const int32_t graph_inputs[1] = { 0 };
638 const int32_t graph_outputs[1] = { 3 };
639 flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
640 builder,
641 builder.CreateVector(tensors, 4),
642 builder.CreateVector<int32_t>(graph_inputs, 1),
643 builder.CreateVector<int32_t>(graph_outputs, 1),
644 builder.CreateVector(&op, 1),
645 builder.CreateString("Conv2D subgraph"));
646
647 flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("Conv2D model");
648
649 flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
650 TFLITE_SCHEMA_VERSION,
651 builder.CreateVector(&operator_code, 1),
652 builder.CreateVector(&subgraph, 1),
653 description,
654 builder.CreateVector(buffers, 3));
655
656 builder.Finish(model_buffer);
657
658 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
659 tflite::ops::builtin::BuiltinOpResolver resolver;
660 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
661 std::unique_ptr<tflite::Interpreter> interpreter;
662 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
663 state.SkipWithError("failed to create TFLite interpreter");
664 return;
665 }
666 if (interpreter == nullptr) {
667 state.SkipWithError("TFLite interpreter is null");
668 return;
669 }
670 interpreter->SetNumThreads(1);
671
672 if (interpreter->AllocateTensors() != kTfLiteOk) {
673 state.SkipWithError("failed to allocate tensors");
674 return;
675 }
676
677 std::generate(
678 interpreter->typed_tensor<float>(0),
679 interpreter->typed_tensor<float>(0) + batch_size * groups * group_input_channels * input_height * input_width,
680 std::ref(f32rng));
681
682 for (auto _ : state) {
683 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700684 benchmark::utils::WipeCache();
685 benchmark::utils::PrefetchToL1(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700686 interpreter->typed_tensor<float>(0),
687 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
688 state.ResumeTiming();
689
690 if (interpreter->Invoke() != kTfLiteOk) {
691 state.SkipWithError("failed to invoke TFLite interpreter");
692 return;
693 }
694 }
695
696 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
697 state.counters["FLOPS"] = benchmark::Counter(
698 uint64_t(state.iterations()) * 2 *
699 batch_size * output_height * output_width *
700 groups * group_input_channels * group_output_channels *
701 kernel_height * kernel_width,
702 benchmark::Counter::kIsRate);
703
704 interpreter.reset();
705}
706#endif // BENCHMARK_TENSORFLOW_LITE
707
708#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
709static std::string compare_with_convolution_f32_reference_output(
710 const benchmark::State& state, const float* input, size_t input_size,
711 const float* kernel, size_t kernel_size, const float* bias, size_t bias_size,
712 const float* output, size_t output_size)
713{
714 const size_t batch_size = state.range(0);
715 const size_t input_height = state.range(1);
716 const size_t input_width = state.range(2);
717 const size_t kernel_height = state.range(3);
718 const size_t kernel_width = state.range(4);
719 const size_t padding_height = state.range(5);
720 const size_t padding_width = state.range(6);
721 const size_t subsampling = state.range(7);
722 const size_t dilation = state.range(8);
723 const size_t groups = state.range(9);
724 const size_t group_input_channels = state.range(10);
725 const size_t group_output_channels = state.range(11);
726
727 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
728 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
729 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
730 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
731 const size_t input_pixel_stride = groups * group_input_channels;
732 const size_t padding_left = padding_width / 2;
733 const size_t padding_top = padding_height / 2;
734
735 assert(input_size == batch_size * input_height * input_width * groups * group_input_channels);
736
737 assert(kernel_size == group_output_channels * kernel_height * kernel_width * groups * group_input_channels);
738
739 assert(bias_size == groups * group_output_channels);
740
741 assert(output_size == batch_size * output_height * output_width * groups * group_output_channels);
742
743 std::vector<float> output_ref(output_size);
744 for (size_t i = 0; i < batch_size; i++) {
745 for (size_t oy = 0; oy < output_height; oy++) {
746 for (size_t ox = 0; ox < output_width; ox++) {
747 for (size_t g = 0; g < groups; g++) {
748 for (size_t oc = 0; oc < group_output_channels; oc++) {
749 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] =
750 bias[g * group_output_channels + oc];
751 }
752 }
753 }
754 }
755 }
756 for (size_t i = 0; i < batch_size; i++) {
757 for (size_t oy = 0; oy < output_height; oy++) {
758 for (size_t ox = 0; ox < output_width; ox++) {
759 for (size_t ky = 0; ky < kernel_height; ky++) {
760 const size_t iy = oy * subsampling + ky * dilation - padding_top;
761 if (iy < input_height) {
762 for (size_t kx = 0; kx < kernel_width; kx++) {
763 const size_t ix = ox * subsampling + kx * dilation - padding_left;
764 if (ix < input_width) {
765 for (size_t g = 0; g < groups; g++) {
766 for (size_t oc = 0; oc < group_output_channels; oc++) {
767 for (size_t ic = 0; ic < group_input_channels; ic++) {
768 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
769 input[((i * input_height + iy) * input_width + ix) * input_pixel_stride + g * group_input_channels + ic] *
770 kernel[(((oc * kernel_height + ky) * kernel_width + kx) * groups + g) * group_input_channels + ic];
771 } // group_input_channels loop
772 } // group_output_channels loop
773 } // groups loop
774 }
775 } // kernel_width loop
776 }
777 } // kernel_height loop
778 } // output_width loop
779 } // output_height loop
780 } // batch_size loop
781
782 const float relative_error_tolerance = 1e-4;
783 for (size_t i = 0; i < batch_size; i++) {
784 for (size_t y = 0; y < output_height; y++) {
785 for (size_t x = 0; x < output_width; x++) {
786 for (size_t g = 0; g < groups; g++) {
787 for (size_t c = 0; c < group_output_channels; c++) {
788 const size_t idx = (((i * output_height + y) * output_width + x) * groups + g) * group_output_channels + c;
789 const float value_ref = output_ref[idx];
790 const float value = output[idx];
791 if (std::abs(value - value_ref) > std::max(std::abs(value_ref) * relative_error_tolerance, std::numeric_limits<float>::epsilon())) {
792 std::ostringstream error_stream;
793 error_stream << "(x, y) = (" << x << ", " << y << "), group = " << g
794 << ", channel = " << c << ", refValue = " << value_ref
795 << ", actualValue = " << value
796 << ", absDiff=" << std::abs(value - value_ref);
797 return error_stream.str();
798 }
799 }
800 }
801 }
802 }
803 }
804 return "";
805}
806
807void armcl_convolution_f32(benchmark::State& state, const char* net) {
808 const size_t batch_size = state.range(0);
809 const size_t input_height = state.range(1);
810 const size_t input_width = state.range(2);
811 const size_t kernel_height = state.range(3);
812 const size_t kernel_width = state.range(4);
813 const size_t padding_height = state.range(5);
814 const size_t padding_width = state.range(6);
815 const size_t subsampling = state.range(7);
816 const size_t dilation = state.range(8);
817 const size_t groups = state.range(9);
818 const size_t group_input_channels = state.range(10);
819 const size_t group_output_channels = state.range(11);
820
821 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
822 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
823 const size_t padding_left = padding_width / 2;
824 const size_t padding_top = padding_height / 2;
825 const size_t padding_right = padding_width - padding_left;
826 const size_t padding_bottom = padding_height - padding_top;
827 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
828 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
829
830 arm_compute::PadStrideInfo pad_stride_info(
831 subsampling /* stride height */,
832 subsampling /* stride width */,
833 padding_left, padding_right, padding_top, padding_bottom,
834 arm_compute::DimensionRoundingType::FLOOR);
835 arm_compute::Size2D dilation_info(dilation, dilation);
836 // Note: activation is disabled by default.
837 arm_compute::ActivationLayerInfo activation_info;
838
839 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
840 arm_compute::TensorShape input_shape(
841 /* C */ groups * group_input_channels,
842 /* W */ input_width,
843 /* H */ input_height,
844 /* N */ batch_size);
845 arm_compute::TensorInfo input_info(
846 input_shape,
847 1 /* number of channels per element (!) */,
848 arm_compute::DataType::F32);
849 input_info.set_data_layout(arm_compute::DataLayout::NHWC);
850 arm_compute::Tensor input_tensor;
851 input_tensor.allocator()->init(input_info);
852 input_tensor.allocator()->allocate();
853
854 // Note: reverse order of dimensions, i.e. for IWHO for OHWI.
855 arm_compute::TensorShape kernel_shape(
856 /* I */ groups * group_input_channels,
857 /* W */ kernel_width,
858 /* H */ kernel_height,
859 /* O */ group_output_channels);
860 arm_compute::TensorInfo kernel_info(
861 kernel_shape,
862 1 /* number of channels per element (!) */,
863 arm_compute::DataType::F32);
864 kernel_info.set_data_layout(arm_compute::DataLayout::NHWC);
865 arm_compute::Tensor kernelTensor;
866 kernelTensor.allocator()->init(kernel_info);
867 kernelTensor.allocator()->allocate();
868
869 arm_compute::TensorShape bias_shape(groups * group_output_channels);
870 arm_compute::TensorInfo bias_info(
871 bias_shape,
872 1 /* number of channels per element (!) */,
873 arm_compute::DataType::F32);
874 bias_info.set_data_layout(arm_compute::DataLayout::NHWC);
875 arm_compute::Tensor bias_tensor;
876 bias_tensor.allocator()->init(bias_info);
877 bias_tensor.allocator()->allocate();
878
879 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
880 arm_compute::TensorShape output_shape(
881 /* C */ groups * group_output_channels,
882 /* W */ output_width,
883 /* H */ output_height,
884 /* N */ batch_size);
885 arm_compute::TensorInfo output_info(
886 output_shape,
887 1 /* number of channels per element (!) */,
888 arm_compute::DataType::F32);
889 output_info.set_data_layout(arm_compute::DataLayout::NHWC);
890 arm_compute::Tensor output_tensor;
891 output_tensor.allocator()->init(output_info);
892 output_tensor.allocator()->allocate();
893
894 std::random_device random_device;
895 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700896 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700897
898 std::generate(
899 reinterpret_cast<float*>(input_tensor.buffer()),
900 reinterpret_cast<float*>(input_tensor.buffer()) + input_shape.total_size(),
901 std::ref(f32rng));
902 std::generate(
903 reinterpret_cast<float*>(kernelTensor.buffer()),
904 reinterpret_cast<float*>(kernelTensor.buffer()) + kernel_shape.total_size(),
905 std::ref(f32rng));
906 std::generate(
907 reinterpret_cast<float*>(bias_tensor.buffer()),
908 reinterpret_cast<float*>(bias_tensor.buffer()) + bias_shape.total_size(),
909 std::ref(f32rng));
910 std::generate(
911 reinterpret_cast<float*>(output_tensor.buffer()),
912 reinterpret_cast<float*>(output_tensor.buffer()) + output_shape.total_size(),
913 std::ref(f32rng));
914
915 bool is_depthwise = false;
916 if (groups != 1) {
917 // NEConvolutionLayer uses NEGEMMConvolutionLayer by default, which doesn't support grouped convolution.
918 // However, depthwise convolution is supported via NEDepthwiseConvolutionLayer.
919 if (group_input_channels == 1) {
920 is_depthwise = true;
921 } else {
922 state.SkipWithError("grouped convolution is not supported");
923 return;
924 }
925 }
926
927 std::shared_ptr<arm_compute::IFunction> layer;
928 if (is_depthwise) {
929 if (dilation != 1) {
930 state.SkipWithError("dilated depthwise convolution is not supported");
931 return;
932 }
933
934 // Avoid NEDepthwiseConvolutionLayer3x3 when stride isn't 2 in order to pass the output verification.
935 // TODO(b/130206370) This looks like a bug and needs further investigation.
936 if (kernel_height == 3 && kernel_width == 3 && subsampling == 2) {
937 auto* depthwise_3x3_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer3x3();
938 layer.reset(depthwise_3x3_convolution_layer);
939 depthwise_3x3_convolution_layer->configure(
940 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
941 pad_stride_info, group_output_channels, activation_info);
942
943 if (!depthwise_3x3_convolution_layer->validate(
944 &input_info, &kernel_info, &bias_info, &output_info,
945 pad_stride_info, group_output_channels, activation_info))
946 {
947 state.SkipWithError("validation failed");
948 return;
949 }
950 } else {
951 auto* depthwise_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer();
952 layer.reset(depthwise_convolution_layer);
953 depthwise_convolution_layer->configure(
954 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
955 pad_stride_info, group_output_channels, activation_info);
956
957 if (!depthwise_convolution_layer->validate(
958 &input_info, &kernel_info, &bias_info, &output_info,
959 pad_stride_info, group_output_channels, activation_info))
960 {
961 state.SkipWithError("validation failed");
962 return;
963 }
964 }
965 } else {
966 auto* convolution_layer = new arm_compute::NEConvolutionLayer();
967 layer.reset(convolution_layer);
968 convolution_layer->configure(
969 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
970 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
971 true /* enable fast math */, groups);
972
973 if (!convolution_layer->validate(
974 &input_info, &kernel_info, &bias_info, &output_info,
975 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
976 true /* enable fast math */, groups))
977 {
978 state.SkipWithError("validation failed");
979 return;
980 }
981 }
982
983 // Dry run to let ACL do one-time initializations.
984 arm_compute::CPPScheduler::get().set_num_threads(1);
985 layer->run();
986
987 for (auto _ : state) {
988 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700989 benchmark::utils::WipeCache();
990 benchmark::utils::PrefetchToL1(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700991 input_tensor.buffer(),
992 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
993 state.ResumeTiming();
994
995 layer->run();
996 }
997
998 // Validate outputs.
999 const std::string error_string = compare_with_convolution_f32_reference_output(
1000 state, reinterpret_cast<const float*>(input_tensor.buffer()),
1001 input_shape.total_size(),
1002 reinterpret_cast<const float*>(kernelTensor.buffer()),
1003 kernel_shape.total_size(),
1004 reinterpret_cast<const float*>(bias_tensor.buffer()),
1005 bias_shape.total_size(),
1006 reinterpret_cast<const float*>(output_tensor.buffer()),
1007 output_shape.total_size());
1008
1009 if (!error_string.empty()) {
1010 state.SkipWithError(("validation failed: " + error_string).c_str());
1011 return;
1012 }
1013
1014 input_tensor.allocator()->free();
1015 kernelTensor.allocator()->free();
1016 bias_tensor.allocator()->free();
1017 output_tensor.allocator()->free();
1018
1019 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
1020 state.counters["FLOPS"] = benchmark::Counter(
1021 uint64_t(state.iterations()) * 2 *
1022 batch_size * output_height * output_width *
1023 groups * group_input_channels * group_output_channels *
1024 kernel_height * kernel_width,
1025 benchmark::Counter::kIsRate);
1026}
1027#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
1028
1029// ShuffleNet v1 with 1 group.
1030static void ShuffleNetV1G1(benchmark::internal::Benchmark* b) {
1031 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1032
1033 /*************************** Conv 1 **************************/
1034 /* N H W KH KW PH PW S D G GCin GCout */
1035 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1036 /******************* Stage 2: stride-2 unit ******************/
1037 /* N H W KH KW PH PW S D G GCin GCout */
1038 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 36});
1039 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 36, 1, 1});
1040 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 120});
1041 /******************* Stage 2: stride-1 units *****************/
1042 /* N H W KH KW PH PW S D G GCin GCout */
1043 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 36});
1044 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 36, 1, 1});
1045 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 144});
1046 /******************* Stage 3: stride-2 unit ******************/
1047 /* N H W KH KW PH PW S D G GCin GCout */
1048 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 72});
1049 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1050 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 144});
1051 /******************* Stage 3: stride-1 units *****************/
1052 /* N H W KH KW PH PW S D G GCin GCout */
1053 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 72});
1054 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1055 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 288});
1056 /******************* Stage 4: stride-2 unit ******************/
1057 /* N H W KH KW PH PW S D G GCin GCout */
1058 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 144});
1059 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1060 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 288});
1061 /******************* Stage 4: stride-1 units *****************/
1062 /* N H W KH KW PH PW S D G GCin GCout */
1063 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1064 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1065 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1066}
1067
1068// ShuffleNet v1 with 2 groups.
1069static void ShuffleNetV1G2(benchmark::internal::Benchmark* b) {
1070 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1071
1072 /*************************** Conv 1 **************************/
1073 /* N H W KH KW PH PW S D G GCin GCout */
1074 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1075 /******************* Stage 2: stride-2 unit ******************/
1076 /* N H W KH KW PH PW S D G GCin GCout */
1077 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 50});
1078 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 50, 1, 1});
1079 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 88});
1080 /******************* Stage 2: stride-1 units *****************/
1081 /* N H W KH KW PH PW S D G GCin GCout */
1082 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 25});
1083 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 50, 1, 1});
1084 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 100});
1085 /******************* Stage 3: stride-2 unit ******************/
1086 /* N H W KH KW PH PW S D G GCin GCout */
1087 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 50});
1088 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 100, 1, 1});
1089 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 100});
1090 /******************* Stage 3: stride-1 units *****************/
1091 /* N H W KH KW PH PW S D G GCin GCout */
1092 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 50});
1093 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 100, 1, 1});
1094 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 200});
1095 /******************* Stage 4: stride-2 unit ******************/
1096 /* N H W KH KW PH PW S D G GCin GCout */
1097 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 100});
1098 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 200, 1, 1});
1099 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 200});
1100 /******************* Stage 4: stride-1 units *****************/
1101 /* N H W KH KW PH PW S D G GCin GCout */
1102 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 400, 100});
1103 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 200, 1, 1});
1104 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 400});
1105}
1106
1107// ShuffleNet v1 with 3 groups.
1108static void ShuffleNetV1G3(benchmark::internal::Benchmark* b) {
1109 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1110
1111 /*************************** Conv 1 **************************/
1112 /* N H W KH KW PH PW S D G GCin GCout */
1113 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1114 /******************* Stage 2: stride-2 unit ******************/
1115 /* N H W KH KW PH PW S D G GCin GCout */
1116 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 60});
1117 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 60, 1, 1});
1118 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 72});
1119 /******************* Stage 2: stride-1 units *****************/
1120 /* N H W KH KW PH PW S D G GCin GCout */
1121 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 20});
1122 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 60, 1, 1});
1123 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 80});
1124 /******************* Stage 3: stride-2 unit ******************/
1125 /* N H W KH KW PH PW S D G GCin GCout */
1126 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 40});
1127 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 120, 1, 1});
1128 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 80});
1129 /******************* Stage 3: stride-1 units *****************/
1130 /* N H W KH KW PH PW S D G GCin GCout */
1131 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 40});
1132 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 120, 1, 1});
1133 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 160});
1134 /******************* Stage 4: stride-2 unit ******************/
1135 /* N H W KH KW PH PW S D G GCin GCout */
1136 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 80});
1137 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1138 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 160});
1139 /******************* Stage 4: stride-1 units *****************/
1140 /* N H W KH KW PH PW S D G GCin GCout */
1141 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 320, 80});
1142 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1143 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 320});
1144}
1145
1146// ShuffleNet v1 with 4 groups.
1147static void ShuffleNetV1G4(benchmark::internal::Benchmark* b) {
1148 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1149
1150 /*************************** Conv 1 **************************/
1151 /* N H W KH KW PH PW S D G GCin GCout */
1152 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1153 /******************* Stage 2: stride-2 unit ******************/
1154 /* N H W KH KW PH PW S D G GCin GCout */
1155 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 68});
1156 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 68, 1, 1});
1157 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 62});
1158 /******************* Stage 2: stride-1 units *****************/
1159 /* N H W KH KW PH PW S D G GCin GCout */
1160 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 17});
1161 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 68, 1, 1});
1162 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 68});
1163 /******************* Stage 3: stride-2 unit ******************/
1164 /* N H W KH KW PH PW S D G GCin GCout */
1165 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 34});
1166 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 136, 1, 1});
1167 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 68});
1168 /******************* Stage 3: stride-1 units *****************/
1169 /* N H W KH KW PH PW S D G GCin GCout */
1170 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 34});
1171 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 136, 1, 1});
1172 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 136});
1173 /******************* Stage 4: stride-2 unit ******************/
1174 /* N H W KH KW PH PW S D G GCin GCout */
1175 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 68});
1176 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 272, 1, 1});
1177 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 136});
1178 /******************* Stage 4: stride-1 units *****************/
1179 /* N H W KH KW PH PW S D G GCin GCout */
1180 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 272, 68});
1181 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 272, 1, 1});
1182 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 272});
1183}
1184
1185// ShuffleNet v1 with 8 groups.
1186static void ShuffleNetV1G8(benchmark::internal::Benchmark* b) {
1187 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1188
1189 /*************************** Conv 1 **************************/
1190 /* N H W KH KW PH PW S D G GCin GCout */
1191 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1192 /******************* Stage 2: stride-2 unit ******************/
1193 /* N H W KH KW PH PW S D G GCin GCout */
1194 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1195 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1196 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 45});
1197 /******************* Stage 2: stride-1 units *****************/
1198 /* N H W KH KW PH PW S D G GCin GCout */
1199 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 12});
1200 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1201 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 48});
1202 /******************* Stage 3: stride-2 unit ******************/
1203 /* N H W KH KW PH PW S D G GCin GCout */
1204 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 24});
1205 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1206 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 48});
1207 /******************* Stage 3: stride-1 units *****************/
1208 /* N H W KH KW PH PW S D G GCin GCout */
1209 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 24});
1210 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1211 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 96});
1212 /******************* Stage 4: stride-2 unit ******************/
1213 /* N H W KH KW PH PW S D G GCin GCout */
1214 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 48});
1215 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 384, 1, 1});
1216 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 96});
1217 /******************* Stage 4: stride-1 units *****************/
1218 /* N H W KH KW PH PW S D G GCin GCout */
1219 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 192, 48});
1220 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 384, 1, 1});
1221 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 192});
1222}
1223
1224// ShuffleNet v2 (0.5X scale)
1225static void ShuffleNetV2X05(benchmark::internal::Benchmark* b) {
1226 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1227
1228 /*************************** Conv 1 **************************/
1229 /* N H W KH KW PH PW S D G GCin GCout */
1230 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1231 /************************** Stage 2 **************************/
1232 /* N H W KH KW PH PW S D G GCin GCout */
1233 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1234 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1235 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1236 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 24, 1, 1});
1237 /************************** Stage 3 **************************/
1238 /* N H W KH KW PH PW S D G GCin GCout */
1239 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 48, 1, 1});
1240 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1241 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1242 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 48, 1, 1});
1243 /************************** Stage 4 **************************/
1244 /* N H W KH KW PH PW S D G GCin GCout */
1245 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1246 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1247 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1248 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 96, 1, 1});
1249 /*************************** Conv 5 **************************/
1250 /* N H W KH KW PH PW S D G GCin GCout */
1251 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 192, 1024});
1252}
1253
1254// ShuffleNet v2 (1.0X scale)
1255static void ShuffleNetV2X10(benchmark::internal::Benchmark* b) {
1256 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1257
1258 /*************************** Conv 1 **************************/
1259 /* N H W KH KW PH PW S D G GCin GCout */
1260 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1261 /************************** Stage 2 **************************/
1262 /* N H W KH KW PH PW S D G GCin GCout */
1263 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1264 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1265 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1266 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 58, 1, 1});
1267 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 58, 58});
1268 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 58, 1, 1});
1269 /************************** Stage 3 **************************/
1270 /* N H W KH KW PH PW S D G GCin GCout */
1271 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 116, 1, 1});
1272 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1273 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1274 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 116, 1, 1});
1275 /************************** Stage 4 **************************/
1276 /* N H W KH KW PH PW S D G GCin GCout */
1277 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 232, 1, 1});
1278 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1279 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1280 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 232, 1, 1});
1281 /*************************** Conv 5 **************************/
1282 /* N H W KH KW PH PW S D G GCin GCout */
1283 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 464, 1024});
1284}
1285
1286// ShuffleNet v2 (1.5X scale)
1287static void ShuffleNetV2X15(benchmark::internal::Benchmark* b) {
1288 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1289
1290 /*************************** Conv 1 **************************/
1291 /* N H W KH KW PH PW S D G GCin GCout */
1292 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1293 /************************** Stage 2 **************************/
1294 /* N H W KH KW PH PW S D G GCin GCout */
1295 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1296 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1297 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1298 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 88, 1, 1});
1299 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 88});
1300 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1301 /************************** Stage 3 **************************/
1302 /* N H W KH KW PH PW S D G GCin GCout */
1303 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 176, 1, 1});
1304 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1305 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1306 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 176, 1, 1});
1307 /************************** Stage 4 **************************/
1308 /* N H W KH KW PH PW S D G GCin GCout */
1309 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 352, 1, 1});
1310 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1311 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1312 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 352, 1, 1});
1313 /*************************** Conv 5 **************************/
1314 /* N H W KH KW PH PW S D G GCin GCout */
1315 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 704, 1024});
1316}
1317
1318// ShuffleNet v2 (2.0X scale)
1319static void ShuffleNetV2X20(benchmark::internal::Benchmark* b) {
1320 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1321
1322 /*************************** Conv 1 **************************/
1323 /* N H W KH KW PH PW S D G GCin GCout */
1324 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1325 /************************** Stage 2 **************************/
1326 /* N H W KH KW PH PW S D G GCin GCout */
1327 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1328 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1329 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1330 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 122, 1, 1});
1331 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 122, 122});
1332 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 122, 1, 1});
1333 /************************** Stage 3 **************************/
1334 /* N H W KH KW PH PW S D G GCin GCout */
1335 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 244, 1, 1});
1336 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1337 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1338 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 244, 1, 1});
1339 /************************** Stage 4 **************************/
1340 /* N H W KH KW PH PW S D G GCin GCout */
1341 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 488, 1, 1});
1342 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1343 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1344 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 488, 1, 1});
1345 /*************************** Conv 5 **************************/
1346 /* N H W KH KW PH PW S D G GCin GCout */
1347 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 976, 2048});
1348}
1349
1350static void MobileNetV1(benchmark::internal::Benchmark* b) {
1351 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1352
1353 /* N H W KH KW PH PW S D G GCin GCout */
1354 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1355 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1356 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 64});
1357 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1358 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 128});
1359 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 128, 1, 1});
1360 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 128, 128});
1361 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 128, 1, 1});
1362 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 256});
1363 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 256, 1, 1});
1364 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1365 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 256, 1, 1});
1366 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 512});
1367 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 512, 1, 1});
1368 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1369 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 512, 1, 1});
1370 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 1024});
1371 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1024, 1, 1});
1372 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 1024, 1024});
1373}
1374
1375static void MobileNetV2(benchmark::internal::Benchmark* b) {
1376 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1377
1378 /* N H W KH KW PH PW S D G GCin GCout */
1379 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1380
1381 /************************ Bottleneck 1 ***********************/
1382 /* N H W KH KW PH PW S D G GCin GCout */
1383 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1384 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 16});
1385
1386 /************************ Bottleneck 2 ***********************/
1387 /* N H W KH KW PH PW S D G GCin GCout */
1388 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 96});
1389 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1390 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1391 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1392 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 144, 1, 1});
1393 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 144, 24});
1394
1395 /************************ Bottleneck 3 ***********************/
1396 /* N H W KH KW PH PW S D G GCin GCout */
1397//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1398 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1399 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 32});
1400 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1401 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1402 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1403//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1404//b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1405//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1406
1407 /************************ Bottleneck 4 ***********************/
1408 /* N H W KH KW PH PW S D G GCin GCout */
1409//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1410 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1411 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1412 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1413 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1414 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1415//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1416//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1417//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1418//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1419//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1420//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1421
1422 /************************ Bottleneck 5 ***********************/
1423 /* N H W KH KW PH PW S D G GCin GCout */
1424//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1425//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1426 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 96});
1427 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1428 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1429 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1430//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1431//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1432//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1433
1434 /************************ Bottleneck 6 ***********************/
1435 /* N H W KH KW PH PW S D G GCin GCout */
1436//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1437 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 576, 1, 1});
1438 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 160});
1439 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1440 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1441 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1442//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1443//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1444//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1445
1446 /************************ Bottleneck 7 ***********************/
1447 /* N H W KH KW PH PW S D G GCin GCout */
1448//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1449//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1450 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 320});
1451
1452 /******************** Pre-pooling Conv2D *********************/
1453 /* N H W KH KW PH PW S D G GCin GCout */
1454 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 320, 1280});
1455 /******************** Post-pooling Conv2D ********************/
1456 /* N H W KH KW PH PW S D G GCin GCout */
1457 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1000});
1458}
1459
1460static void MobileNetV3Small(benchmark::internal::Benchmark* b) {
1461 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1462
1463 /*********************** Initial Stage ***********************/
1464 /* N H W KH KW PH PW S D G GCin GCout */
1465 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1466 /*********************** Bottleneck 1 ************************/
1467 /* N H W KH KW PH PW S D G GCin GCout */
1468 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 16, 1, 1});
1469 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 16, 8});
1470 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 8, 16});
1471 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1472 /*********************** Bottleneck 2 ************************/
1473 /* N H W KH KW PH PW S D G GCin GCout */
1474 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 72});
1475 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1476 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1477 /*********************** Bottleneck 3 ************************/
1478 /* N H W KH KW PH PW S D G GCin GCout */
1479 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1480 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1481 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 24});
1482 /*********************** Bottleneck 4 ************************/
1483 /* N H W KH KW PH PW S D G GCin GCout */
1484 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1485 b->Args({1, 28, 28, 5, 5, 4, 4, 2, 1, 96, 1, 1});
1486 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1487 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1488 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 40});
1489 /*********************** Bottleneck 5 ************************/
1490 /* N H W KH KW PH PW S D G GCin GCout */
1491 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1492 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1493 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1494 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1495 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1496 /*********************** Bottleneck 6 ************************/
1497 /* N H W KH KW PH PW S D G GCin GCout */
1498//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1499//b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1500//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1501//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1502//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1503 /*********************** Bottleneck 7 ************************/
1504 /* N H W KH KW PH PW S D G GCin GCout */
1505 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1506 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1507 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1508 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1509 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 120, 48});
1510 /*********************** Bottleneck 8 ************************/
1511 /* N H W KH KW PH PW S D G GCin GCout */
1512 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 144});
1513 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 144, 1, 1});
1514 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 40});
1515 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 40, 144});
1516 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 144, 48});
1517 /*********************** Bottleneck 9 ************************/
1518 /* N H W KH KW PH PW S D G GCin GCout */
1519 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 288});
1520 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 288, 1, 1});
1521 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 288, 72});
1522 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 288});
1523 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 288, 96});
1524 /*********************** Bottleneck 10 ***********************/
1525 /* N H W KH KW PH PW S D G GCin GCout */
1526 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1527 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1528 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1529 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1530 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1531 /*********************** Bottleneck 11 ***********************/
1532 /* N H W KH KW PH PW S D G GCin GCout */
1533//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1534//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1535//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1536//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1537//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1538 /************************ Last Stage ************************/
1539 /* N H W KH KW PH PW S D G GCin GCout */
1540//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1541 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 1024});
1542 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1024, 1001});
1543}
1544
1545static void MobileNetV3Large(benchmark::internal::Benchmark* b) {
1546 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1547
1548 /*********************** Initial Stage ***********************/
1549 /* N H W KH KW PH PW S D G GCin GCout */
1550 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1551 /*********************** Bottleneck 1 ************************/
1552 /* N H W KH KW PH PW S D G GCin GCout */
1553 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 16, 1, 1});
1554 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1555 /*********************** Bottleneck 2 ************************/
1556 /* N H W KH KW PH PW S D G GCin GCout */
1557 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1558 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1559 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 24});
1560 /*********************** Bottleneck 3 ************************/
1561 /* N H W KH KW PH PW S D G GCin GCout */
1562 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1563 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 72, 1, 1});
1564 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1565 /*********************** Bottleneck 4 ************************/
1566 /* N H W KH KW PH PW S D G GCin GCout */
1567//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1568 b->Args({1, 56, 56, 5, 5, 4, 4, 2, 1, 72, 1, 1});
1569 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1570 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1571 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 40});
1572 /*********************** Bottleneck 5 ************************/
1573 /* N H W KH KW PH PW S D G GCin GCout */
1574 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1575 b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1576 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1577 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1578 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1579 /*********************** Bottleneck 6 ************************/
1580 /* N H W KH KW PH PW S D G GCin GCout */
1581//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1582//b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1583//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1584//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1585//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1586 /*********************** Bottleneck 7 ************************/
1587 /* N H W KH KW PH PW S D G GCin GCout */
1588 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1589 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1590 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 80});
1591 /*********************** Bottleneck 8 ************************/
1592 /* N H W KH KW PH PW S D G GCin GCout */
1593 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 200});
1594 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 200, 1, 1});
1595 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 200, 80});
1596 /*********************** Bottleneck 9 ************************/
1597 /* N H W KH KW PH PW S D G GCin GCout */
1598 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1599 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1600 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1601 /********************** Bottleneck 10 ***********************/
1602 /* N H W KH KW PH PW S D G GCin GCout */
1603//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1604//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1605//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1606 /********************** Bottleneck 11 ***********************/
1607 /* N H W KH KW PH PW S D G GCin GCout */
1608 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 480});
1609 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 480, 1, 1});
1610 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 480, 120});
1611 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 480});
1612 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 480, 112});
1613 /********************** Bottleneck 12 ***********************/
1614 /* N H W KH KW PH PW S D G GCin GCout */
1615 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1616 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 672, 1, 1});
1617 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 672, 168});
1618 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 168, 672});
1619 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 672, 112});
1620 /********************** Bottleneck 13 ***********************/
1621 /* N H W KH KW PH PW S D G GCin GCout */
1622//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1623 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 672, 1, 1});
1624 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 672, 160});
1625 /********************** Bottleneck 14 ***********************/
1626 /* N H W KH KW PH PW S D G GCin GCout */
1627 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1628 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1629 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1630 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1631 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1632 /********************** Bottleneck 15 ***********************/
1633 /* N H W KH KW PH PW S D G GCin GCout */
1634//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1635//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1636//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1637//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1638//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1639 /************************ Last Stage ***********************/
1640 /* N H W KH KW PH PW S D G GCin GCout */
1641//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1642 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 1280});
1643 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1001});
1644}
1645
1646// SqueezeNet 1.0
1647static void SqueezeNetV10(benchmark::internal::Benchmark* b) {
1648 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1649
1650 /************************** Conv 1 *************************/
1651 /* N H W KH KW PH PW S D G GCin GCout */
1652 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 96});
1653 /************************** Fire 2 *************************/
1654 /* N H W KH KW PH PW S D G GCin GCout */
1655 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 96, 16});
1656 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1657 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1658 /************************** Fire 3 *************************/
1659 /* N H W KH KW PH PW S D G GCin GCout */
1660 b->Args({1, 56, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1661//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1662//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1663 /************************** Fire 4 *************************/
1664 /* N H W KH KW PH PW S D G GCin GCout */
1665 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1666 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1667 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1668 /************************** Fire 5 *************************/
1669 /* N H W KH KW PH PW S D G GCin GCout */
1670 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1671 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1672 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1673 /************************** Fire 6 *************************/
1674 /* N H W KH KW PH PW S D G GCin GCout */
1675 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1676 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1677 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1678 /************************** Fire 7 *************************/
1679 /* N H W KH KW PH PW S D G GCin GCout */
1680 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1681//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1682//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1683 /************************** Fire 8 *************************/
1684 /* N H W KH KW PH PW S D G GCin GCout */
1685 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1686 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1687 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1688 /************************** Fire 9 *************************/
1689 /* N H W KH KW PH PW S D G GCin GCout */
1690 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1691 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1692 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1693 /************************* Conv 10 *************************/
1694 /* N H W KH KW PH PW S D G GCin GCout */
1695 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1696}
1697
1698// SqueezeNet 1.1
1699static void SqueezeNetV11(benchmark::internal::Benchmark* b) {
1700 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1701
1702 /************************** Conv 1 *************************/
1703 /* N H W KH KW PH PW S D G GCin GCout */
1704 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 64});
1705 /************************** Fire 2 *************************/
1706 /* N H W KH KW PH PW S D G GCin GCout */
1707 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 64, 16});
1708 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1709 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1710 /************************** Fire 3 *************************/
1711 /* N H W KH KW PH PW S D G GCin GCout */
1712 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1713//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1714//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1715 /************************** Fire 4 *************************/
1716 /* N H W KH KW PH PW S D G GCin GCout */
1717 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1718 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1719 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1720 /************************** Fire 5 *************************/
1721 /* N H W KH KW PH PW S D G GCin GCout */
1722 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1723//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1724//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1725 /************************** Fire 6 *************************/
1726 /* N H W KH KW PH PW S D G GCin GCout */
1727 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1728 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1729 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1730 /************************** Fire 7 *************************/
1731 /* N H W KH KW PH PW S D G GCin GCout */
1732 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1733//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1734//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1735 /************************** Fire 8 *************************/
1736 /* N H W KH KW PH PW S D G GCin GCout */
1737 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1738 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1739 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1740 /************************** Fire 9 *************************/
1741 /* N H W KH KW PH PW S D G GCin GCout */
1742 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1743//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1744//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1745 /************************* Conv 10 *************************/
1746 /* N H W KH KW PH PW S D G GCin GCout */
1747 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1748}
1749
1750static void InceptionV3(benchmark::internal::Benchmark* b) {
1751 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1752
1753 /* N H W KH KW PH PW S D G GCin GCout */
1754 b->Args({1, 299, 299, 3, 3, 0, 0, 2, 1, 1, 3, 32});
1755 b->Args({1, 149, 149, 3, 3, 0, 0, 1, 1, 1, 32, 32});
1756 b->Args({1, 147, 147, 3, 3, 2, 2, 1, 1, 1, 32, 64});
1757 b->Args({1, 73, 73, 1, 1, 0, 0, 1, 1, 1, 64, 80});
1758 b->Args({1, 73, 73, 3, 3, 0, 0, 1, 1, 1, 80, 192});
1759 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1760 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 48});
1761 b->Args({1, 35, 35, 5, 5, 4, 4, 1, 1, 1, 48, 64});
1762 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 64, 96});
1763 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 96, 96});
1764 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1765 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1766 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1767 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 64});
1768 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 48});
1769 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 288, 384});
1770 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 96, 96});
1771 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 192});
1772 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 128});
1773 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 128});
1774 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 192});
1775 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 128});
1776 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 192});
1777 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 160});
1778 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 160});
1779 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 192});
1780 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 160});
1781 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 192});
1782 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 192, 192});
1783 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 192, 192});
1784 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 320});
1785 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 192});
1786 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 320});
1787 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 384});
1788 b->Args({1, 8, 8, 1, 3, 0, 2, 1, 1, 1, 384, 384});
1789 b->Args({1, 8, 8, 3, 1, 2, 0, 1, 1, 1, 384, 384});
1790 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 448});
1791 b->Args({1, 8, 8, 3, 3, 2, 2, 1, 1, 1, 448, 384});
1792 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 192});
1793 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 320});
1794 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 384});
1795 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 448});
1796 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 192});
1797 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2048, 1001});
1798}
1799
1800static void ResNet18(benchmark::internal::Benchmark* b) {
1801 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1802
1803 /************************* Conv 1 *************************/
1804 /* N H W KH KW PH PW S D G GCin GCout */
1805 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1806 /************************ Conv 2.X ************************/
1807 /* N H W KH KW PH PW S D G GCin GCout */
1808 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1809 /************************ Conv 3.X ************************/
1810 /* N H W KH KW PH PW S D G GCin GCout */
1811 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 64, 128});
1812 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1813 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 64, 128});
1814 /************************ Conv 4.X ************************/
1815 /* N H W KH KW PH PW S D G GCin GCout */
1816 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 128, 256});
1817 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1818 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 128, 256});
1819 /************************ Conv 5.X ************************/
1820 /* N H W KH KW PH PW S D G GCin GCout */
1821 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 256, 512});
1822 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1823 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1824}
1825
1826static void ResNet50(benchmark::internal::Benchmark* b) {
1827 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1828
1829 /************************* Conv 1 *************************/
1830 /* N H W KH KW PH PW S D G GCin GCout */
1831 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1832 /************************ Conv 2.1 ************************/
1833 /* N H W KH KW PH PW S D G GCin GCout */
1834 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 64});
1835 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1836 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1837//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1838 /************************ Conv 2.X ************************/
1839 /* N H W KH KW PH PW S D G GCin GCout */
1840 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1841//b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1842//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1843 /************************ Conv 3.1 ************************/
1844 /* N H W KH KW PH PW S D G GCin GCout */
1845 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 128});
1846 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 128, 128});
1847 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1848 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1849 /************************ Conv 3.X ************************/
1850 /* N H W KH KW PH PW S D G GCin GCout */
1851 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 128});
1852 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1853//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1854 /************************ Conv 4.1 ************************/
1855 /* N H W KH KW PH PW S D G GCin GCout */
1856 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 256});
1857 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 256, 256});
1858 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1859 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 512, 1024});
1860 /************************ Conv 4.X ************************/
1861 /* N H W KH KW PH PW S D G GCin GCout */
1862 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 256});
1863 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1864//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1865 /************************ Conv 5.1 ************************/
1866 /* N H W KH KW PH PW S D G GCin GCout */
1867 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 512});
1868 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 512, 512});
1869 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1870 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 1024, 2048});
1871 /************************ Conv 5.X ************************/
1872 /* N H W KH KW PH PW S D G GCin GCout */
1873 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 2048, 512});
1874 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1875//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1876}
1877
1878static void VGG(benchmark::internal::Benchmark* b) {
1879 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1880
1881 /************************* Conv 1.1 ************************/
1882 /* N H W KH KW PH PW S D G GCin GCout */
1883 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 3, 64});
1884 /************************* Conv 1.2 ************************/
1885 /* N H W KH KW PH PW S D G GCin GCout */
1886 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1887
1888 /************************* Conv 2.1 ************************/
1889 /* N H W KH KW PH PW S D G GCin GCout */
1890 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 64, 128});
1891 /************************* Conv 2.2 ************************/
1892 /* N H W KH KW PH PW S D G GCin GCout */
1893 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1894
1895 /************************* Conv 3.1 ************************/
1896 /* N H W KH KW PH PW S D G GCin GCout */
1897 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 128, 256});
1898 /************************* Conv 3.2 ************************/
1899 /* N H W KH KW PH PW S D G GCin GCout */
1900 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1901 /************************* Conv 3.3 ************************/
1902 /* N H W KH KW PH PW S D G GCin GCout */
1903 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1904
1905 /************************* Conv 4.1 ************************/
1906 /* N H W KH KW PH PW S D G GCin GCout */
1907 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 256, 512});
1908 /************************* Conv 4.2 ************************/
1909 /* N H W KH KW PH PW S D G GCin GCout */
1910 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1911 /************************* Conv 4.3 ************************/
1912 /* N H W KH KW PH PW S D G GCin GCout */
1913 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1914
1915 /************************* Conv 5.X ************************/
1916 /* N H W KH KW PH PW S D G GCin GCout */
1917 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1918 /************************* Conv 5.3 ************************/
1919 /* N H W KH KW PH PW S D G GCin GCout */
1920 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1921}
1922
1923// SRCNN (9-1-5)
1924static void SRCNN915(benchmark::internal::Benchmark* b) {
1925 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1926
1927 /* N H W KH KW PH PW S D G GCin GCout */
1928 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1929 b->Args({1, 376, 376, 1, 1, 0, 0, 1, 1, 1, 64, 32});
1930 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1931}
1932
1933// SRCNN (9-3-5)
1934static void SRCNN935(benchmark::internal::Benchmark* b) {
1935 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1936
1937 /* N H W KH KW PH PW S D G GCin GCout */
1938 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1939 b->Args({1, 376, 376, 3, 3, 0, 0, 1, 1, 1, 64, 32});
1940 b->Args({1, 374, 374, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1941}
1942
1943// SRCNN (9-5-5)
1944static void SRCNN955(benchmark::internal::Benchmark* b) {
1945 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1946
1947 /* N H W KH KW PH PW S D G GCin GCout */
1948 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1949 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 64, 32});
1950 b->Args({1, 372, 372, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1951}
1952
Chao Meic6640272020-07-23 09:35:11 -07001953#ifndef XNN_NO_F16_OPERATORS
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07001954 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1955 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1956 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1957 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1958 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1959 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1960 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1961 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1962 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1963 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1964 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1965 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1966 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1967 BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1968 BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1969 BENCHMARK_CAPTURE(xnnpack_convolution_f16, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1970 BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1971 BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1972 BENCHMARK_CAPTURE(xnnpack_convolution_f16, vgg, "VGG")->Apply(VGG)->UseRealTime();
1973 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1974 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1975 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -07001976#endif // XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -07001977
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07001978#ifndef XNN_NO_F32_OPERATORS
1979 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1980 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1981 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1982 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1983 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1984 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1985 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1986 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1987 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1988 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1989 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1990 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1991 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1992 BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1993 BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1994 BENCHMARK_CAPTURE(xnnpack_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1995 BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1996 BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1997 BENCHMARK_CAPTURE(xnnpack_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1998 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1999 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2000 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2001#endif // XNN_NO_F32_OPERATORS
2002
2003#ifndef XNN_NO_QS8_OPERATORS
2004 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2005 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2006 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
2007 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
2008 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2009 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2010 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2011 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2012 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2013 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2014 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2015 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2016 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2017 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2018 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2019 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2020 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2021 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2022 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, vgg, "VGG")->Apply(VGG)->UseRealTime();
2023 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2024 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2025 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2026#endif // XNN_NO_QS8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002027
Chao Meic6640272020-07-23 09:35:11 -07002028#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002029 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2030 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2031 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
2032 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
2033 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2034 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2035 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2036 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2037 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2038 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2039 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2040 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2041 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2042 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2043 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2044 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2045 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2046 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2047 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, vgg, "VGG")->Apply(VGG)->UseRealTime();
2048 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2049 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2050 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -07002051#endif // XNN_NO_QU8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002052
2053#ifdef BENCHMARK_TENSORFLOW_LITE
2054 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2055 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2056 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
2057 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
2058 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2059 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2060 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2061 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2062 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2063 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2064 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2065 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2066 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2067 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2068 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2069 BENCHMARK_CAPTURE(tflite_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2070 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2071 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2072 BENCHMARK_CAPTURE(tflite_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
2073 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2074 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2075 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2076#endif // BENCHMARK_TENSORFLOW_LITE
2077
2078#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
2079 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2080 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2081 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2082 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2083 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2084 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2085 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2086 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2087 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2088 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2089 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2090 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2091 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2092 BENCHMARK_CAPTURE(armcl_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2093 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2094 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2095 BENCHMARK_CAPTURE(armcl_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
2096 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2097 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2098 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2099#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
2100
2101#ifndef XNNPACK_BENCHMARK_NO_MAIN
2102BENCHMARK_MAIN();
2103#endif