blob: aabe4810e67ef120ebb49c959043a11fb6732a96 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <ostream>
14#include <random>
15#include <string>
16#include <vector>
17
18#include <cpuinfo.h>
19#include <xnnpack.h>
20
21#include <benchmark/benchmark.h>
22
23#include "bench/utils.h"
24
25#ifdef BENCHMARK_TENSORFLOW_LITE
26#include "flatbuffers/include/flatbuffers/flatbuffers.h"
27#include "tensorflow/lite/interpreter.h"
28#include "tensorflow/lite/kernels/register.h"
29#include "tensorflow/lite/model.h"
30#include "tensorflow/lite/optional_debug_tools.h"
31#include "tensorflow/lite/schema/schema_generated.h"
32#include "tensorflow/lite/version.h"
33#endif // BENCHMARK_TENSORFLOW_LITE
34
35#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
36#include "arm_compute/core/Types.h"
37#include "arm_compute/runtime/Tensor.h"
38#include "arm_compute/runtime/CPP/CPPScheduler.h"
39#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
40#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
41#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
42
43
44void xnnpack_convolution_q8(benchmark::State& state, const char* net) {
45 const size_t batch_size = state.range(0);
46 const size_t input_height = state.range(1);
47 const size_t input_width = state.range(2);
48 const size_t kernel_height = state.range(3);
49 const size_t kernel_width = state.range(4);
50 const size_t padding_height = state.range(5);
51 const size_t padding_width = state.range(6);
52 const size_t subsampling = state.range(7);
53 const size_t dilation = state.range(8);
54 const size_t groups = state.range(9);
55 const size_t group_input_channels = state.range(10);
56 const size_t group_output_channels = state.range(11);
57
58 std::random_device random_device;
59 auto rng = std::mt19937(random_device());
60 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
61 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
62
63 const size_t output_pixel_stride = groups * group_output_channels;
64 const size_t input_pixel_stride = groups * group_input_channels;
65 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
66 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
67 const size_t padding_left = padding_width / 2;
68 const size_t padding_top = padding_height / 2;
69 const size_t padding_right = padding_width - padding_left;
70 const size_t padding_bottom = padding_height - padding_top;
71 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
72 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
73
74 std::vector<uint8_t> input(batch_size * input_height * input_width * input_pixel_stride);
75 std::generate(input.begin(), input.end(), std::ref(u8rng));
76 std::vector<uint8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
77 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
78 std::vector<int32_t> bias(groups * group_output_channels);
79 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
80 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
81
82 xnn_status status = xnn_initialize();
83 if (status != xnn_status_success) {
84 state.SkipWithError("failed to initialize XNNPACK");
85 return;
86 }
87
88 if (!cpuinfo_initialize()) {
89 state.SkipWithError("cpuinfo initialization failed");
90 return;
91 }
92 const size_t num_buffers = 1 +
93 benchmark::utils::divideRoundUp<size_t>(cpuinfo_get_max_cache_size(),
94 sizeof(uint8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(uint8_t) * output_elements);
95 std::vector<uint8_t> output(output_elements * num_buffers);
96
97 std::vector<xnn_operator_t> convolution_operators(num_buffers);
98 for (xnn_operator_t& convolution_op : convolution_operators) {
99 status = xnn_create_convolution2d_nhwc_q8(
100 padding_top, padding_right, padding_bottom, padding_left,
101 kernel_height, kernel_width,
102 subsampling, subsampling,
103 dilation, dilation,
104 groups, group_input_channels, group_output_channels,
105 input_pixel_stride, output_pixel_stride,
106 127, 0.5f,
107 127, 0.5f,
108 kernel.data(), bias.data(),
109 127, 0.5f, 0, 255,
110 0 /* flags */, &convolution_op);
111 if (status != xnn_status_success) {
112 state.SkipWithError("failed to create QINT8 Convolution operator");
113 return;
114 }
115 }
116
117 for (size_t i = 0; i < convolution_operators.size(); i++) {
118 status = xnn_setup_convolution2d_nhwc_q8(
119 convolution_operators[i],
120 batch_size, input_height, input_width,
121 input.data(), output.data() + i * output_elements,
122 nullptr /* thread pool */);
123 if (status != xnn_status_success) {
124 state.SkipWithError("failed to setup QINT8 Convolution operator");
125 return;
126 }
127 }
128
129 size_t buffer_index = 0;
130 for (auto _ : state) {
131 state.PauseTiming();
132 benchmark::utils::prefetchToL1(input.data(), input.size() * sizeof(uint8_t));
133 buffer_index = (buffer_index + 1) % num_buffers;
134 state.ResumeTiming();
135
136 status = xnn_run_operator(convolution_operators[buffer_index],
137 nullptr /* thread pool */);
138 if (status != xnn_status_success) {
139 state.SkipWithError("failed to run QINT8 Convolution operator");
140 return;
141 }
142 }
143
144 for (xnn_operator_t& convolution_op : convolution_operators) {
145 status = xnn_delete_operator(convolution_op);
146 if (status != xnn_status_success) {
147 state.SkipWithError("failed to delete QINT8 Convolution operator");
148 return;
149 }
150 convolution_op = nullptr;
151 }
152
153 state.counters["OPS"] = benchmark::Counter(
154 uint64_t(state.iterations()) * 2 *
155 batch_size * output_height * output_width *
156 groups * group_input_channels * group_output_channels *
157 kernel_height * kernel_width,
158 benchmark::Counter::kIsRate);
159}
160
161void xnnpack_convolution_f32(benchmark::State& state, const char* net) {
162 const size_t batch_size = state.range(0);
163 const size_t input_height = state.range(1);
164 const size_t input_width = state.range(2);
165 const size_t kernel_height = state.range(3);
166 const size_t kernel_width = state.range(4);
167 const size_t padding_height = state.range(5);
168 const size_t padding_width = state.range(6);
169 const size_t subsampling = state.range(7);
170 const size_t dilation = state.range(8);
171 const size_t groups = state.range(9);
172 const size_t group_input_channels = state.range(10);
173 const size_t group_output_channels = state.range(11);
174
175 std::random_device random_device;
176 auto rng = std::mt19937(random_device());
177 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
178
179 const size_t output_pixel_stride = groups * group_output_channels;
180 const size_t input_pixel_stride = groups * group_input_channels;
181 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
182 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
183 const size_t padding_left = padding_width / 2;
184 const size_t padding_top = padding_height / 2;
185 const size_t padding_right = padding_width - padding_left;
186 const size_t padding_bottom = padding_height - padding_top;
187 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
188 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
189
190 std::vector<float> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(float));
191 std::generate(input.begin(), input.end(), std::ref(f32rng));
192 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
193 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
194 std::vector<float> bias(groups * group_output_channels);
195 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
196 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
197
198 xnn_status status = xnn_initialize();
199 if (status != xnn_status_success) {
200 state.SkipWithError("failed to initialize XNNPACK");
201 return;
202 }
203
204 if (!cpuinfo_initialize()) {
205 state.SkipWithError("cpuinfo initialization failed");
206 return;
207 }
208 const size_t num_buffers = 1 +
209 benchmark::utils::divideRoundUp<size_t>(cpuinfo_get_max_cache_size(),
210 sizeof(float) * (kernel.size() + bias.size() + output_elements));
211 std::vector<float> output(output_elements * num_buffers);
212
213 std::vector<xnn_operator_t> convolution_operators(num_buffers);
214 for (xnn_operator_t& convolution_op : convolution_operators) {
215 status = xnn_create_convolution2d_nhwc_f32(
216 padding_top, padding_right, padding_bottom, padding_left,
217 kernel_height, kernel_width,
218 subsampling, subsampling,
219 dilation, dilation,
220 groups, group_input_channels, group_output_channels,
221 input_pixel_stride, output_pixel_stride,
222 kernel.data(), bias.data(),
223 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
224 0 /* flags */, &convolution_op);
225 if (status != xnn_status_success) {
226 state.SkipWithError("failed to create FP32 Convolution operator");
227 return;
228 }
229 }
230
231 for (size_t i = 0; i < convolution_operators.size(); i++) {
232 status = xnn_setup_convolution2d_nhwc_f32(
233 convolution_operators[i],
234 batch_size, input_height, input_width,
235 input.data(), output.data() + i * output_elements,
236 nullptr /* thread pool */);
237 if (status != xnn_status_success) {
238 state.SkipWithError("failed to setup FP32 Convolution operator");
239 return;
240 }
241 }
242
243 size_t buffer_index = 0;
244 for (auto _ : state) {
245 state.PauseTiming();
246 benchmark::utils::prefetchToL1(input.data(), input.size() * sizeof(float));
247 buffer_index = (buffer_index + 1) % num_buffers;
248 state.ResumeTiming();
249
250 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
251 if (status != xnn_status_success) {
252 state.SkipWithError("failed to run FP32 Convolution operator");
253 return;
254 }
255 }
256
257 for (xnn_operator_t& convolution_op : convolution_operators) {
258 status = xnn_delete_operator(convolution_op);
259 if (status != xnn_status_success) {
260 state.SkipWithError("failed to delete FP32 Convolution operator");
261 return;
262 }
263 convolution_op = nullptr;
264 }
265
266 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
267 state.counters["FLOPS"] = benchmark::Counter(
268 uint64_t(state.iterations()) * 2 *
269 batch_size * output_height * output_width *
270 groups * group_input_channels * group_output_channels *
271 kernel_height * kernel_width,
272 benchmark::Counter::kIsRate);
273}
274
275#ifdef BENCHMARK_TENSORFLOW_LITE
276void tflite_convolution_f32(benchmark::State& state, const char* net) {
277 const size_t batch_size = state.range(0);
278 const size_t input_height = state.range(1);
279 const size_t input_width = state.range(2);
280 const size_t kernel_height = state.range(3);
281 const size_t kernel_width = state.range(4);
282 const size_t padding_height = state.range(5);
283 const size_t padding_width = state.range(6);
284 const size_t subsampling = state.range(7);
285 const size_t dilation = state.range(8);
286 const size_t groups = state.range(9);
287 const size_t group_input_channels = state.range(10);
288 const size_t group_output_channels = state.range(11);
289
290 bool is_depthwise = false;
291 if (groups != 1) {
292 if (group_input_channels == 1) {
293 is_depthwise = true;
294 } else {
295 state.SkipWithError("grouped convolution is not supported");
296 return;
297 }
298 }
299
300 std::random_device random_device;
301 auto rng = std::mt19937(random_device());
302 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
303
304 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
305 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
306
307 tflite::Padding padding = tflite::Padding_VALID;
308 if (padding_width == (effective_kernel_width - 1) && padding_height == (effective_kernel_height - 1)) {
309 padding = tflite::Padding_SAME;
310 } else if (padding_width == 0 && padding_height == 0) {
311 padding = tflite::Padding_VALID;
312 } else {
313 state.SkipWithError("unsupported padding");
314 return;
315 }
316
317 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
318 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
319
320 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
321 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
322 std::vector<float> bias(groups * group_output_channels);
323 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
324
325 flatbuffers::FlatBufferBuilder builder;
326 flatbuffers::Offset<tflite::OperatorCode> operator_code =
327 CreateOperatorCode(
328 builder,
329 is_depthwise ? tflite::BuiltinOperator_DEPTHWISE_CONV_2D : tflite::BuiltinOperator_CONV_2D,
330 0);
331
332 flatbuffers::Offset<tflite::Conv2DOptions> conv2d_options = CreateConv2DOptions(
333 builder,
334 padding,
335 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
336 tflite::ActivationFunctionType_NONE,
337 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
338
339 flatbuffers::Offset<tflite::DepthwiseConv2DOptions> dwconv2d_options = CreateDepthwiseConv2DOptions(
340 builder,
341 padding,
342 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
343 static_cast<int32_t>(group_output_channels),
344 tflite::ActivationFunctionType_NONE,
345 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
346
347 flatbuffers::Offset<tflite::Buffer> buffers[3] = {
348 tflite::CreateBuffer(builder, builder.CreateVector({})),
349 tflite::CreateBuffer(builder, builder.CreateVector(
350 reinterpret_cast<const uint8_t*>(kernel.data()),
351 sizeof(float) * kernel.size())),
352 tflite::CreateBuffer(builder, builder.CreateVector(
353 reinterpret_cast<const uint8_t*>(bias.data()),
354 sizeof(float) * bias.size())),
355 };
356
357 const int32_t input_shape[4] = {
358 static_cast<int32_t>(batch_size),
359 static_cast<int32_t>(input_height),
360 static_cast<int32_t>(input_width),
361 static_cast<int32_t>(groups * group_input_channels)
362 };
363 const int32_t output_shape[4] = {
364 static_cast<int32_t>(batch_size),
365 static_cast<int32_t>(output_height),
366 static_cast<int32_t>(output_width),
367 static_cast<int32_t>(groups * group_output_channels)
368 };
369 const int32_t filter_shape[4] = {
370 static_cast<int32_t>(group_output_channels),
371 static_cast<int32_t>(kernel_height),
372 static_cast<int32_t>(kernel_width),
373 static_cast<int32_t>(groups * group_input_channels)
374 };
375 const int32_t bias_shape[1] = {
376 static_cast<int32_t>(groups * group_output_channels)
377 };
378
379 flatbuffers::Offset<tflite::Tensor> tensors[4] = {
380 tflite::CreateTensor(builder,
381 builder.CreateVector<int32_t>(input_shape, 4),
382 tflite::TensorType_FLOAT32,
383 0 /* buffer id */,
384 builder.CreateString("input")),
385 tflite::CreateTensor(builder,
386 builder.CreateVector<int32_t>(filter_shape, 4),
387 tflite::TensorType_FLOAT32,
388 1 /* buffer id */,
389 builder.CreateString("filter")),
390 tflite::CreateTensor(builder,
391 builder.CreateVector<int32_t>(bias_shape, 1),
392 tflite::TensorType_FLOAT32,
393 2 /* buffer id */,
394 builder.CreateString("bias")),
395 tflite::CreateTensor(builder,
396 builder.CreateVector<int32_t>(output_shape, 4),
397 tflite::TensorType_FLOAT32,
398 0 /* buffer id */,
399 builder.CreateString("output")),
400 };
401
402 const int32_t op_inputs[3] = { 0, 1, 2 };
403 const int32_t op_outputs[1] = { 3 };
404 flatbuffers::Offset<tflite::Operator> op = CreateOperator(
405 builder,
406 0 /* opcode_index */,
407 builder.CreateVector<int32_t>(op_inputs, 3),
408 builder.CreateVector<int32_t>(op_outputs, 1),
409 is_depthwise ? tflite::BuiltinOptions_DepthwiseConv2DOptions : tflite::BuiltinOptions_Conv2DOptions,
410 is_depthwise ? dwconv2d_options.Union() : conv2d_options.Union(),
411 /*custom_options */ 0,
412 tflite::CustomOptionsFormat_FLEXBUFFERS);
413
414 const int32_t graph_inputs[1] = { 0 };
415 const int32_t graph_outputs[1] = { 3 };
416 flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
417 builder,
418 builder.CreateVector(tensors, 4),
419 builder.CreateVector<int32_t>(graph_inputs, 1),
420 builder.CreateVector<int32_t>(graph_outputs, 1),
421 builder.CreateVector(&op, 1),
422 builder.CreateString("Conv2D subgraph"));
423
424 flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("Conv2D model");
425
426 flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
427 TFLITE_SCHEMA_VERSION,
428 builder.CreateVector(&operator_code, 1),
429 builder.CreateVector(&subgraph, 1),
430 description,
431 builder.CreateVector(buffers, 3));
432
433 builder.Finish(model_buffer);
434
435 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
436 tflite::ops::builtin::BuiltinOpResolver resolver;
437 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
438 std::unique_ptr<tflite::Interpreter> interpreter;
439 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
440 state.SkipWithError("failed to create TFLite interpreter");
441 return;
442 }
443 if (interpreter == nullptr) {
444 state.SkipWithError("TFLite interpreter is null");
445 return;
446 }
447 interpreter->SetNumThreads(1);
448
449 if (interpreter->AllocateTensors() != kTfLiteOk) {
450 state.SkipWithError("failed to allocate tensors");
451 return;
452 }
453
454 std::generate(
455 interpreter->typed_tensor<float>(0),
456 interpreter->typed_tensor<float>(0) + batch_size * groups * group_input_channels * input_height * input_width,
457 std::ref(f32rng));
458
459 for (auto _ : state) {
460 state.PauseTiming();
461 benchmark::utils::wipeCache();
462 benchmark::utils::prefetchToL1(
463 interpreter->typed_tensor<float>(0),
464 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
465 state.ResumeTiming();
466
467 if (interpreter->Invoke() != kTfLiteOk) {
468 state.SkipWithError("failed to invoke TFLite interpreter");
469 return;
470 }
471 }
472
473 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
474 state.counters["FLOPS"] = benchmark::Counter(
475 uint64_t(state.iterations()) * 2 *
476 batch_size * output_height * output_width *
477 groups * group_input_channels * group_output_channels *
478 kernel_height * kernel_width,
479 benchmark::Counter::kIsRate);
480
481 interpreter.reset();
482}
483#endif // BENCHMARK_TENSORFLOW_LITE
484
485#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
486static std::string compare_with_convolution_f32_reference_output(
487 const benchmark::State& state, const float* input, size_t input_size,
488 const float* kernel, size_t kernel_size, const float* bias, size_t bias_size,
489 const float* output, size_t output_size)
490{
491 const size_t batch_size = state.range(0);
492 const size_t input_height = state.range(1);
493 const size_t input_width = state.range(2);
494 const size_t kernel_height = state.range(3);
495 const size_t kernel_width = state.range(4);
496 const size_t padding_height = state.range(5);
497 const size_t padding_width = state.range(6);
498 const size_t subsampling = state.range(7);
499 const size_t dilation = state.range(8);
500 const size_t groups = state.range(9);
501 const size_t group_input_channels = state.range(10);
502 const size_t group_output_channels = state.range(11);
503
504 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
505 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
506 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
507 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
508 const size_t input_pixel_stride = groups * group_input_channels;
509 const size_t padding_left = padding_width / 2;
510 const size_t padding_top = padding_height / 2;
511
512 assert(input_size == batch_size * input_height * input_width * groups * group_input_channels);
513
514 assert(kernel_size == group_output_channels * kernel_height * kernel_width * groups * group_input_channels);
515
516 assert(bias_size == groups * group_output_channels);
517
518 assert(output_size == batch_size * output_height * output_width * groups * group_output_channels);
519
520 std::vector<float> output_ref(output_size);
521 for (size_t i = 0; i < batch_size; i++) {
522 for (size_t oy = 0; oy < output_height; oy++) {
523 for (size_t ox = 0; ox < output_width; ox++) {
524 for (size_t g = 0; g < groups; g++) {
525 for (size_t oc = 0; oc < group_output_channels; oc++) {
526 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] =
527 bias[g * group_output_channels + oc];
528 }
529 }
530 }
531 }
532 }
533 for (size_t i = 0; i < batch_size; i++) {
534 for (size_t oy = 0; oy < output_height; oy++) {
535 for (size_t ox = 0; ox < output_width; ox++) {
536 for (size_t ky = 0; ky < kernel_height; ky++) {
537 const size_t iy = oy * subsampling + ky * dilation - padding_top;
538 if (iy < input_height) {
539 for (size_t kx = 0; kx < kernel_width; kx++) {
540 const size_t ix = ox * subsampling + kx * dilation - padding_left;
541 if (ix < input_width) {
542 for (size_t g = 0; g < groups; g++) {
543 for (size_t oc = 0; oc < group_output_channels; oc++) {
544 for (size_t ic = 0; ic < group_input_channels; ic++) {
545 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
546 input[((i * input_height + iy) * input_width + ix) * input_pixel_stride + g * group_input_channels + ic] *
547 kernel[(((oc * kernel_height + ky) * kernel_width + kx) * groups + g) * group_input_channels + ic];
548 } // group_input_channels loop
549 } // group_output_channels loop
550 } // groups loop
551 }
552 } // kernel_width loop
553 }
554 } // kernel_height loop
555 } // output_width loop
556 } // output_height loop
557 } // batch_size loop
558
559 const float relative_error_tolerance = 1e-4;
560 for (size_t i = 0; i < batch_size; i++) {
561 for (size_t y = 0; y < output_height; y++) {
562 for (size_t x = 0; x < output_width; x++) {
563 for (size_t g = 0; g < groups; g++) {
564 for (size_t c = 0; c < group_output_channels; c++) {
565 const size_t idx = (((i * output_height + y) * output_width + x) * groups + g) * group_output_channels + c;
566 const float value_ref = output_ref[idx];
567 const float value = output[idx];
568 if (std::abs(value - value_ref) > std::max(std::abs(value_ref) * relative_error_tolerance, std::numeric_limits<float>::epsilon())) {
569 std::ostringstream error_stream;
570 error_stream << "(x, y) = (" << x << ", " << y << "), group = " << g
571 << ", channel = " << c << ", refValue = " << value_ref
572 << ", actualValue = " << value
573 << ", absDiff=" << std::abs(value - value_ref);
574 return error_stream.str();
575 }
576 }
577 }
578 }
579 }
580 }
581 return "";
582}
583
584void armcl_convolution_f32(benchmark::State& state, const char* net) {
585 const size_t batch_size = state.range(0);
586 const size_t input_height = state.range(1);
587 const size_t input_width = state.range(2);
588 const size_t kernel_height = state.range(3);
589 const size_t kernel_width = state.range(4);
590 const size_t padding_height = state.range(5);
591 const size_t padding_width = state.range(6);
592 const size_t subsampling = state.range(7);
593 const size_t dilation = state.range(8);
594 const size_t groups = state.range(9);
595 const size_t group_input_channels = state.range(10);
596 const size_t group_output_channels = state.range(11);
597
598 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
599 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
600 const size_t padding_left = padding_width / 2;
601 const size_t padding_top = padding_height / 2;
602 const size_t padding_right = padding_width - padding_left;
603 const size_t padding_bottom = padding_height - padding_top;
604 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
605 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
606
607 arm_compute::PadStrideInfo pad_stride_info(
608 subsampling /* stride height */,
609 subsampling /* stride width */,
610 padding_left, padding_right, padding_top, padding_bottom,
611 arm_compute::DimensionRoundingType::FLOOR);
612 arm_compute::Size2D dilation_info(dilation, dilation);
613 // Note: activation is disabled by default.
614 arm_compute::ActivationLayerInfo activation_info;
615
616 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
617 arm_compute::TensorShape input_shape(
618 /* C */ groups * group_input_channels,
619 /* W */ input_width,
620 /* H */ input_height,
621 /* N */ batch_size);
622 arm_compute::TensorInfo input_info(
623 input_shape,
624 1 /* number of channels per element (!) */,
625 arm_compute::DataType::F32);
626 input_info.set_data_layout(arm_compute::DataLayout::NHWC);
627 arm_compute::Tensor input_tensor;
628 input_tensor.allocator()->init(input_info);
629 input_tensor.allocator()->allocate();
630
631 // Note: reverse order of dimensions, i.e. for IWHO for OHWI.
632 arm_compute::TensorShape kernel_shape(
633 /* I */ groups * group_input_channels,
634 /* W */ kernel_width,
635 /* H */ kernel_height,
636 /* O */ group_output_channels);
637 arm_compute::TensorInfo kernel_info(
638 kernel_shape,
639 1 /* number of channels per element (!) */,
640 arm_compute::DataType::F32);
641 kernel_info.set_data_layout(arm_compute::DataLayout::NHWC);
642 arm_compute::Tensor kernelTensor;
643 kernelTensor.allocator()->init(kernel_info);
644 kernelTensor.allocator()->allocate();
645
646 arm_compute::TensorShape bias_shape(groups * group_output_channels);
647 arm_compute::TensorInfo bias_info(
648 bias_shape,
649 1 /* number of channels per element (!) */,
650 arm_compute::DataType::F32);
651 bias_info.set_data_layout(arm_compute::DataLayout::NHWC);
652 arm_compute::Tensor bias_tensor;
653 bias_tensor.allocator()->init(bias_info);
654 bias_tensor.allocator()->allocate();
655
656 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
657 arm_compute::TensorShape output_shape(
658 /* C */ groups * group_output_channels,
659 /* W */ output_width,
660 /* H */ output_height,
661 /* N */ batch_size);
662 arm_compute::TensorInfo output_info(
663 output_shape,
664 1 /* number of channels per element (!) */,
665 arm_compute::DataType::F32);
666 output_info.set_data_layout(arm_compute::DataLayout::NHWC);
667 arm_compute::Tensor output_tensor;
668 output_tensor.allocator()->init(output_info);
669 output_tensor.allocator()->allocate();
670
671 std::random_device random_device;
672 auto rng = std::mt19937(random_device());
673 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
674
675 std::generate(
676 reinterpret_cast<float*>(input_tensor.buffer()),
677 reinterpret_cast<float*>(input_tensor.buffer()) + input_shape.total_size(),
678 std::ref(f32rng));
679 std::generate(
680 reinterpret_cast<float*>(kernelTensor.buffer()),
681 reinterpret_cast<float*>(kernelTensor.buffer()) + kernel_shape.total_size(),
682 std::ref(f32rng));
683 std::generate(
684 reinterpret_cast<float*>(bias_tensor.buffer()),
685 reinterpret_cast<float*>(bias_tensor.buffer()) + bias_shape.total_size(),
686 std::ref(f32rng));
687 std::generate(
688 reinterpret_cast<float*>(output_tensor.buffer()),
689 reinterpret_cast<float*>(output_tensor.buffer()) + output_shape.total_size(),
690 std::ref(f32rng));
691
692 bool is_depthwise = false;
693 if (groups != 1) {
694 // NEConvolutionLayer uses NEGEMMConvolutionLayer by default, which doesn't support grouped convolution.
695 // However, depthwise convolution is supported via NEDepthwiseConvolutionLayer.
696 if (group_input_channels == 1) {
697 is_depthwise = true;
698 } else {
699 state.SkipWithError("grouped convolution is not supported");
700 return;
701 }
702 }
703
704 std::shared_ptr<arm_compute::IFunction> layer;
705 if (is_depthwise) {
706 if (dilation != 1) {
707 state.SkipWithError("dilated depthwise convolution is not supported");
708 return;
709 }
710
711 // Avoid NEDepthwiseConvolutionLayer3x3 when stride isn't 2 in order to pass the output verification.
712 // TODO(b/130206370) This looks like a bug and needs further investigation.
713 if (kernel_height == 3 && kernel_width == 3 && subsampling == 2) {
714 auto* depthwise_3x3_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer3x3();
715 layer.reset(depthwise_3x3_convolution_layer);
716 depthwise_3x3_convolution_layer->configure(
717 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
718 pad_stride_info, group_output_channels, activation_info);
719
720 if (!depthwise_3x3_convolution_layer->validate(
721 &input_info, &kernel_info, &bias_info, &output_info,
722 pad_stride_info, group_output_channels, activation_info))
723 {
724 state.SkipWithError("validation failed");
725 return;
726 }
727 } else {
728 auto* depthwise_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer();
729 layer.reset(depthwise_convolution_layer);
730 depthwise_convolution_layer->configure(
731 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
732 pad_stride_info, group_output_channels, activation_info);
733
734 if (!depthwise_convolution_layer->validate(
735 &input_info, &kernel_info, &bias_info, &output_info,
736 pad_stride_info, group_output_channels, activation_info))
737 {
738 state.SkipWithError("validation failed");
739 return;
740 }
741 }
742 } else {
743 auto* convolution_layer = new arm_compute::NEConvolutionLayer();
744 layer.reset(convolution_layer);
745 convolution_layer->configure(
746 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
747 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
748 true /* enable fast math */, groups);
749
750 if (!convolution_layer->validate(
751 &input_info, &kernel_info, &bias_info, &output_info,
752 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
753 true /* enable fast math */, groups))
754 {
755 state.SkipWithError("validation failed");
756 return;
757 }
758 }
759
760 // Dry run to let ACL do one-time initializations.
761 arm_compute::CPPScheduler::get().set_num_threads(1);
762 layer->run();
763
764 for (auto _ : state) {
765 state.PauseTiming();
766 benchmark::utils::wipeCache();
767 benchmark::utils::prefetchToL1(
768 input_tensor.buffer(),
769 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
770 state.ResumeTiming();
771
772 layer->run();
773 }
774
775 // Validate outputs.
776 const std::string error_string = compare_with_convolution_f32_reference_output(
777 state, reinterpret_cast<const float*>(input_tensor.buffer()),
778 input_shape.total_size(),
779 reinterpret_cast<const float*>(kernelTensor.buffer()),
780 kernel_shape.total_size(),
781 reinterpret_cast<const float*>(bias_tensor.buffer()),
782 bias_shape.total_size(),
783 reinterpret_cast<const float*>(output_tensor.buffer()),
784 output_shape.total_size());
785
786 if (!error_string.empty()) {
787 state.SkipWithError(("validation failed: " + error_string).c_str());
788 return;
789 }
790
791 input_tensor.allocator()->free();
792 kernelTensor.allocator()->free();
793 bias_tensor.allocator()->free();
794 output_tensor.allocator()->free();
795
796 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
797 state.counters["FLOPS"] = benchmark::Counter(
798 uint64_t(state.iterations()) * 2 *
799 batch_size * output_height * output_width *
800 groups * group_input_channels * group_output_channels *
801 kernel_height * kernel_width,
802 benchmark::Counter::kIsRate);
803}
804#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
805
806// ShuffleNet v1 with 1 group.
807static void ShuffleNetV1G1(benchmark::internal::Benchmark* b) {
808 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
809
810 /*************************** Conv 1 **************************/
811 /* N H W KH KW PH PW S D G GCin GCout */
812 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
813 /******************* Stage 2: stride-2 unit ******************/
814 /* N H W KH KW PH PW S D G GCin GCout */
815 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 36});
816 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 36, 1, 1});
817 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 120});
818 /******************* Stage 2: stride-1 units *****************/
819 /* N H W KH KW PH PW S D G GCin GCout */
820 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 36});
821 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 36, 1, 1});
822 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 144});
823 /******************* Stage 3: stride-2 unit ******************/
824 /* N H W KH KW PH PW S D G GCin GCout */
825 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 72});
826 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 72, 1, 1});
827 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 144});
828 /******************* Stage 3: stride-1 units *****************/
829 /* N H W KH KW PH PW S D G GCin GCout */
830 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 72});
831 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 72, 1, 1});
832 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 288});
833 /******************* Stage 4: stride-2 unit ******************/
834 /* N H W KH KW PH PW S D G GCin GCout */
835 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 144});
836 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 144, 1, 1});
837 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 288});
838 /******************* Stage 4: stride-1 units *****************/
839 /* N H W KH KW PH PW S D G GCin GCout */
840 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 144});
841 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 144, 1, 1});
842 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 576});
843}
844
845// ShuffleNet v1 with 2 groups.
846static void ShuffleNetV1G2(benchmark::internal::Benchmark* b) {
847 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
848
849 /*************************** Conv 1 **************************/
850 /* N H W KH KW PH PW S D G GCin GCout */
851 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
852 /******************* Stage 2: stride-2 unit ******************/
853 /* N H W KH KW PH PW S D G GCin GCout */
854 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 50});
855 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 50, 1, 1});
856 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 88});
857 /******************* Stage 2: stride-1 units *****************/
858 /* N H W KH KW PH PW S D G GCin GCout */
859 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 25});
860 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 50, 1, 1});
861 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 100});
862 /******************* Stage 3: stride-2 unit ******************/
863 /* N H W KH KW PH PW S D G GCin GCout */
864 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 50});
865 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 100, 1, 1});
866 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 100});
867 /******************* Stage 3: stride-1 units *****************/
868 /* N H W KH KW PH PW S D G GCin GCout */
869 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 50});
870 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 100, 1, 1});
871 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 200});
872 /******************* Stage 4: stride-2 unit ******************/
873 /* N H W KH KW PH PW S D G GCin GCout */
874 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 100});
875 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 200, 1, 1});
876 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 200});
877 /******************* Stage 4: stride-1 units *****************/
878 /* N H W KH KW PH PW S D G GCin GCout */
879 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 400, 100});
880 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 200, 1, 1});
881 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 400});
882}
883
884// ShuffleNet v1 with 3 groups.
885static void ShuffleNetV1G3(benchmark::internal::Benchmark* b) {
886 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
887
888 /*************************** Conv 1 **************************/
889 /* N H W KH KW PH PW S D G GCin GCout */
890 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
891 /******************* Stage 2: stride-2 unit ******************/
892 /* N H W KH KW PH PW S D G GCin GCout */
893 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 60});
894 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 60, 1, 1});
895 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 72});
896 /******************* Stage 2: stride-1 units *****************/
897 /* N H W KH KW PH PW S D G GCin GCout */
898 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 20});
899 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 60, 1, 1});
900 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 80});
901 /******************* Stage 3: stride-2 unit ******************/
902 /* N H W KH KW PH PW S D G GCin GCout */
903 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 40});
904 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 120, 1, 1});
905 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 80});
906 /******************* Stage 3: stride-1 units *****************/
907 /* N H W KH KW PH PW S D G GCin GCout */
908 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 40});
909 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 120, 1, 1});
910 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 160});
911 /******************* Stage 4: stride-2 unit ******************/
912 /* N H W KH KW PH PW S D G GCin GCout */
913 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 80});
914 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 240, 1, 1});
915 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 160});
916 /******************* Stage 4: stride-1 units *****************/
917 /* N H W KH KW PH PW S D G GCin GCout */
918 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 320, 80});
919 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 240, 1, 1});
920 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 320});
921}
922
923// ShuffleNet v1 with 4 groups.
924static void ShuffleNetV1G4(benchmark::internal::Benchmark* b) {
925 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
926
927 /*************************** Conv 1 **************************/
928 /* N H W KH KW PH PW S D G GCin GCout */
929 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
930 /******************* Stage 2: stride-2 unit ******************/
931 /* N H W KH KW PH PW S D G GCin GCout */
932 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 68});
933 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 68, 1, 1});
934 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 62});
935 /******************* Stage 2: stride-1 units *****************/
936 /* N H W KH KW PH PW S D G GCin GCout */
937 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 17});
938 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 68, 1, 1});
939 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 68});
940 /******************* Stage 3: stride-2 unit ******************/
941 /* N H W KH KW PH PW S D G GCin GCout */
942 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 34});
943 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 136, 1, 1});
944 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 68});
945 /******************* Stage 3: stride-1 units *****************/
946 /* N H W KH KW PH PW S D G GCin GCout */
947 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 34});
948 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 136, 1, 1});
949 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 136});
950 /******************* Stage 4: stride-2 unit ******************/
951 /* N H W KH KW PH PW S D G GCin GCout */
952 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 68});
953 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 272, 1, 1});
954 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 136});
955 /******************* Stage 4: stride-1 units *****************/
956 /* N H W KH KW PH PW S D G GCin GCout */
957 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 272, 68});
958 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 272, 1, 1});
959 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 272});
960}
961
962// ShuffleNet v1 with 8 groups.
963static void ShuffleNetV1G8(benchmark::internal::Benchmark* b) {
964 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
965
966 /*************************** Conv 1 **************************/
967 /* N H W KH KW PH PW S D G GCin GCout */
968 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
969 /******************* Stage 2: stride-2 unit ******************/
970 /* N H W KH KW PH PW S D G GCin GCout */
971 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 96});
972 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 96, 1, 1});
973 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 45});
974 /******************* Stage 2: stride-1 units *****************/
975 /* N H W KH KW PH PW S D G GCin GCout */
976 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 12});
977 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 96, 1, 1});
978 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 48});
979 /******************* Stage 3: stride-2 unit ******************/
980 /* N H W KH KW PH PW S D G GCin GCout */
981 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 24});
982 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
983 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 48});
984 /******************* Stage 3: stride-1 units *****************/
985 /* N H W KH KW PH PW S D G GCin GCout */
986 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 24});
987 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 192, 1, 1});
988 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 96});
989 /******************* Stage 4: stride-2 unit ******************/
990 /* N H W KH KW PH PW S D G GCin GCout */
991 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 48});
992 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 384, 1, 1});
993 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 96});
994 /******************* Stage 4: stride-1 units *****************/
995 /* N H W KH KW PH PW S D G GCin GCout */
996 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 192, 48});
997 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 384, 1, 1});
998 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 192});
999}
1000
1001// ShuffleNet v2 (0.5X scale)
1002static void ShuffleNetV2X05(benchmark::internal::Benchmark* b) {
1003 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1004
1005 /*************************** Conv 1 **************************/
1006 /* N H W KH KW PH PW S D G GCin GCout */
1007 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1008 /************************** Stage 2 **************************/
1009 /* N H W KH KW PH PW S D G GCin GCout */
1010 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1011 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1012 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1013 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 24, 1, 1});
1014 /************************** Stage 3 **************************/
1015 /* N H W KH KW PH PW S D G GCin GCout */
1016 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 48, 1, 1});
1017 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1018 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1019 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 48, 1, 1});
1020 /************************** Stage 4 **************************/
1021 /* N H W KH KW PH PW S D G GCin GCout */
1022 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1023 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1024 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1025 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 96, 1, 1});
1026 /*************************** Conv 5 **************************/
1027 /* N H W KH KW PH PW S D G GCin GCout */
1028 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 192, 1024});
1029}
1030
1031// ShuffleNet v2 (1.0X scale)
1032static void ShuffleNetV2X10(benchmark::internal::Benchmark* b) {
1033 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1034
1035 /*************************** Conv 1 **************************/
1036 /* N H W KH KW PH PW S D G GCin GCout */
1037 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1038 /************************** Stage 2 **************************/
1039 /* N H W KH KW PH PW S D G GCin GCout */
1040 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1041 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1042 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1043 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 58, 1, 1});
1044 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 58, 58});
1045 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 58, 1, 1});
1046 /************************** Stage 3 **************************/
1047 /* N H W KH KW PH PW S D G GCin GCout */
1048 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 116, 1, 1});
1049 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1050 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1051 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 116, 1, 1});
1052 /************************** Stage 4 **************************/
1053 /* N H W KH KW PH PW S D G GCin GCout */
1054 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 232, 1, 1});
1055 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1056 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1057 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 232, 1, 1});
1058 /*************************** Conv 5 **************************/
1059 /* N H W KH KW PH PW S D G GCin GCout */
1060 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 464, 1024});
1061}
1062
1063// ShuffleNet v2 (1.5X scale)
1064static void ShuffleNetV2X15(benchmark::internal::Benchmark* b) {
1065 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1066
1067 /*************************** Conv 1 **************************/
1068 /* N H W KH KW PH PW S D G GCin GCout */
1069 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1070 /************************** Stage 2 **************************/
1071 /* N H W KH KW PH PW S D G GCin GCout */
1072 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1073 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1074 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1075 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 88, 1, 1});
1076 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 88});
1077 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1078 /************************** Stage 3 **************************/
1079 /* N H W KH KW PH PW S D G GCin GCout */
1080 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 176, 1, 1});
1081 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1082 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1083 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 176, 1, 1});
1084 /************************** Stage 4 **************************/
1085 /* N H W KH KW PH PW S D G GCin GCout */
1086 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 352, 1, 1});
1087 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1088 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1089 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 352, 1, 1});
1090 /*************************** Conv 5 **************************/
1091 /* N H W KH KW PH PW S D G GCin GCout */
1092 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 704, 1024});
1093}
1094
1095// ShuffleNet v2 (2.0X scale)
1096static void ShuffleNetV2X20(benchmark::internal::Benchmark* b) {
1097 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1098
1099 /*************************** Conv 1 **************************/
1100 /* N H W KH KW PH PW S D G GCin GCout */
1101 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1102 /************************** Stage 2 **************************/
1103 /* N H W KH KW PH PW S D G GCin GCout */
1104 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1105 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1106 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1107 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 122, 1, 1});
1108 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 122, 122});
1109 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 122, 1, 1});
1110 /************************** Stage 3 **************************/
1111 /* N H W KH KW PH PW S D G GCin GCout */
1112 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 244, 1, 1});
1113 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1114 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1115 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 244, 1, 1});
1116 /************************** Stage 4 **************************/
1117 /* N H W KH KW PH PW S D G GCin GCout */
1118 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 488, 1, 1});
1119 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1120 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1121 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 488, 1, 1});
1122 /*************************** Conv 5 **************************/
1123 /* N H W KH KW PH PW S D G GCin GCout */
1124 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 976, 2048});
1125}
1126
1127static void MobileNetV1(benchmark::internal::Benchmark* b) {
1128 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1129
1130 /* N H W KH KW PH PW S D G GCin GCout */
1131 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1132 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1133 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 64});
1134 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1135 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 128});
1136 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 128, 1, 1});
1137 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 128, 128});
1138 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 128, 1, 1});
1139 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 256});
1140 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 256, 1, 1});
1141 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1142 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 256, 1, 1});
1143 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 512});
1144 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 512, 1, 1});
1145 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1146 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 512, 1, 1});
1147 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 1024});
1148 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1024, 1, 1});
1149 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 1024, 1024});
1150}
1151
1152static void MobileNetV2(benchmark::internal::Benchmark* b) {
1153 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1154
1155 /* N H W KH KW PH PW S D G GCin GCout */
1156 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1157
1158 /************************ Bottleneck 1 ***********************/
1159 /* N H W KH KW PH PW S D G GCin GCout */
1160 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1161 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 16});
1162
1163 /************************ Bottleneck 2 ***********************/
1164 /* N H W KH KW PH PW S D G GCin GCout */
1165 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 96});
1166 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1167 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1168 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1169 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 144, 1, 1});
1170 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 144, 24});
1171
1172 /************************ Bottleneck 3 ***********************/
1173 /* N H W KH KW PH PW S D G GCin GCout */
1174//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1175 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1176 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 32});
1177 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1178 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1179 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1180//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1181//b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1182//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1183
1184 /************************ Bottleneck 4 ***********************/
1185 /* N H W KH KW PH PW S D G GCin GCout */
1186//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1187 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1188 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1189 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1190 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1191 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1192//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1193//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1194//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1195//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1196//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1197//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1198
1199 /************************ Bottleneck 5 ***********************/
1200 /* N H W KH KW PH PW S D G GCin GCout */
1201//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1202//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1203 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 96});
1204 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1205 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1206 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1207//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1208//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1209//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1210
1211 /************************ Bottleneck 6 ***********************/
1212 /* N H W KH KW PH PW S D G GCin GCout */
1213//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1214 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 576, 1, 1});
1215 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 160});
1216 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1217 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1218 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1219//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1220//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1221//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1222
1223 /************************ Bottleneck 7 ***********************/
1224 /* N H W KH KW PH PW S D G GCin GCout */
1225//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1226//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1227 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 320});
1228
1229 /******************** Pre-pooling Conv2D *********************/
1230 /* N H W KH KW PH PW S D G GCin GCout */
1231 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 320, 1280});
1232 /******************** Post-pooling Conv2D ********************/
1233 /* N H W KH KW PH PW S D G GCin GCout */
1234 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1000});
1235}
1236
1237static void MobileNetV3Small(benchmark::internal::Benchmark* b) {
1238 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1239
1240 /*********************** Initial Stage ***********************/
1241 /* N H W KH KW PH PW S D G GCin GCout */
1242 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1243 /*********************** Bottleneck 1 ************************/
1244 /* N H W KH KW PH PW S D G GCin GCout */
1245 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 16, 1, 1});
1246 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 16, 8});
1247 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 8, 16});
1248 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1249 /*********************** Bottleneck 2 ************************/
1250 /* N H W KH KW PH PW S D G GCin GCout */
1251 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 72});
1252 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1253 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1254 /*********************** Bottleneck 3 ************************/
1255 /* N H W KH KW PH PW S D G GCin GCout */
1256 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1257 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1258 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 24});
1259 /*********************** Bottleneck 4 ************************/
1260 /* N H W KH KW PH PW S D G GCin GCout */
1261 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1262 b->Args({1, 28, 28, 5, 5, 4, 4, 2, 1, 96, 1, 1});
1263 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1264 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1265 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 40});
1266 /*********************** Bottleneck 5 ************************/
1267 /* N H W KH KW PH PW S D G GCin GCout */
1268 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1269 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1270 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1271 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1272 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1273 /*********************** Bottleneck 6 ************************/
1274 /* N H W KH KW PH PW S D G GCin GCout */
1275//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1276//b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1277//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1278//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1279//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1280 /*********************** Bottleneck 7 ************************/
1281 /* N H W KH KW PH PW S D G GCin GCout */
1282 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1283 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1284 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1285 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1286 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 120, 48});
1287 /*********************** Bottleneck 8 ************************/
1288 /* N H W KH KW PH PW S D G GCin GCout */
1289 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 144});
1290 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 144, 1, 1});
1291 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 40});
1292 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 40, 144});
1293 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 144, 48});
1294 /*********************** Bottleneck 9 ************************/
1295 /* N H W KH KW PH PW S D G GCin GCout */
1296 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 288});
1297 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 288, 1, 1});
1298 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 288, 72});
1299 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 288});
1300 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 288, 96});
1301 /*********************** Bottleneck 10 ***********************/
1302 /* N H W KH KW PH PW S D G GCin GCout */
1303 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1304 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1305 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1306 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1307 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1308 /*********************** Bottleneck 11 ***********************/
1309 /* N H W KH KW PH PW S D G GCin GCout */
1310//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1311//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1312//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1313//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1314//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1315 /************************ Last Stage ************************/
1316 /* N H W KH KW PH PW S D G GCin GCout */
1317//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1318 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 1024});
1319 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1024, 1001});
1320}
1321
1322static void MobileNetV3Large(benchmark::internal::Benchmark* b) {
1323 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1324
1325 /*********************** Initial Stage ***********************/
1326 /* N H W KH KW PH PW S D G GCin GCout */
1327 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1328 /*********************** Bottleneck 1 ************************/
1329 /* N H W KH KW PH PW S D G GCin GCout */
1330 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 16, 1, 1});
1331 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1332 /*********************** Bottleneck 2 ************************/
1333 /* N H W KH KW PH PW S D G GCin GCout */
1334 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1335 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1336 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 24});
1337 /*********************** Bottleneck 3 ************************/
1338 /* N H W KH KW PH PW S D G GCin GCout */
1339 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1340 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 72, 1, 1});
1341 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1342 /*********************** Bottleneck 4 ************************/
1343 /* N H W KH KW PH PW S D G GCin GCout */
1344//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1345 b->Args({1, 56, 56, 5, 5, 4, 4, 2, 1, 72, 1, 1});
1346 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1347 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1348 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 40});
1349 /*********************** Bottleneck 5 ************************/
1350 /* N H W KH KW PH PW S D G GCin GCout */
1351 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1352 b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1353 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1354 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1355 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1356 /*********************** Bottleneck 6 ************************/
1357 /* N H W KH KW PH PW S D G GCin GCout */
1358//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1359//b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1360//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1361//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1362//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1363 /*********************** Bottleneck 7 ************************/
1364 /* N H W KH KW PH PW S D G GCin GCout */
1365 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1366 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1367 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 80});
1368 /*********************** Bottleneck 8 ************************/
1369 /* N H W KH KW PH PW S D G GCin GCout */
1370 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 200});
1371 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 200, 1, 1});
1372 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 200, 80});
1373 /*********************** Bottleneck 9 ************************/
1374 /* N H W KH KW PH PW S D G GCin GCout */
1375 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1376 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1377 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1378 /********************** Bottleneck 10 ***********************/
1379 /* N H W KH KW PH PW S D G GCin GCout */
1380//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1381//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1382//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1383 /********************** Bottleneck 11 ***********************/
1384 /* N H W KH KW PH PW S D G GCin GCout */
1385 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 480});
1386 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 480, 1, 1});
1387 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 480, 120});
1388 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 480});
1389 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 480, 112});
1390 /********************** Bottleneck 12 ***********************/
1391 /* N H W KH KW PH PW S D G GCin GCout */
1392 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1393 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 672, 1, 1});
1394 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 672, 168});
1395 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 168, 672});
1396 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 672, 112});
1397 /********************** Bottleneck 13 ***********************/
1398 /* N H W KH KW PH PW S D G GCin GCout */
1399//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1400 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 672, 1, 1});
1401 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 672, 160});
1402 /********************** Bottleneck 14 ***********************/
1403 /* N H W KH KW PH PW S D G GCin GCout */
1404 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1405 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1406 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1407 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1408 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1409 /********************** Bottleneck 15 ***********************/
1410 /* N H W KH KW PH PW S D G GCin GCout */
1411//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1412//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1413//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1414//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1415//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1416 /************************ Last Stage ***********************/
1417 /* N H W KH KW PH PW S D G GCin GCout */
1418//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1419 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 1280});
1420 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1001});
1421}
1422
1423// SqueezeNet 1.0
1424static void SqueezeNetV10(benchmark::internal::Benchmark* b) {
1425 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1426
1427 /************************** Conv 1 *************************/
1428 /* N H W KH KW PH PW S D G GCin GCout */
1429 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 96});
1430 /************************** Fire 2 *************************/
1431 /* N H W KH KW PH PW S D G GCin GCout */
1432 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 96, 16});
1433 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1434 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1435 /************************** Fire 3 *************************/
1436 /* N H W KH KW PH PW S D G GCin GCout */
1437 b->Args({1, 56, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1438//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1439//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1440 /************************** Fire 4 *************************/
1441 /* N H W KH KW PH PW S D G GCin GCout */
1442 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1443 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1444 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1445 /************************** Fire 5 *************************/
1446 /* N H W KH KW PH PW S D G GCin GCout */
1447 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1448 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1449 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1450 /************************** Fire 6 *************************/
1451 /* N H W KH KW PH PW S D G GCin GCout */
1452 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1453 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1454 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1455 /************************** Fire 7 *************************/
1456 /* N H W KH KW PH PW S D G GCin GCout */
1457 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1458//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1459//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1460 /************************** Fire 8 *************************/
1461 /* N H W KH KW PH PW S D G GCin GCout */
1462 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1463 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1464 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1465 /************************** Fire 9 *************************/
1466 /* N H W KH KW PH PW S D G GCin GCout */
1467 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1468 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1469 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1470 /************************* Conv 10 *************************/
1471 /* N H W KH KW PH PW S D G GCin GCout */
1472 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1473}
1474
1475// SqueezeNet 1.1
1476static void SqueezeNetV11(benchmark::internal::Benchmark* b) {
1477 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1478
1479 /************************** Conv 1 *************************/
1480 /* N H W KH KW PH PW S D G GCin GCout */
1481 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 64});
1482 /************************** Fire 2 *************************/
1483 /* N H W KH KW PH PW S D G GCin GCout */
1484 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 64, 16});
1485 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1486 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1487 /************************** Fire 3 *************************/
1488 /* N H W KH KW PH PW S D G GCin GCout */
1489 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1490//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1491//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1492 /************************** Fire 4 *************************/
1493 /* N H W KH KW PH PW S D G GCin GCout */
1494 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1495 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1496 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1497 /************************** Fire 5 *************************/
1498 /* N H W KH KW PH PW S D G GCin GCout */
1499 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1500//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1501//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1502 /************************** Fire 6 *************************/
1503 /* N H W KH KW PH PW S D G GCin GCout */
1504 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1505 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1506 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1507 /************************** Fire 7 *************************/
1508 /* N H W KH KW PH PW S D G GCin GCout */
1509 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1510//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1511//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1512 /************************** Fire 8 *************************/
1513 /* N H W KH KW PH PW S D G GCin GCout */
1514 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1515 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1516 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1517 /************************** Fire 9 *************************/
1518 /* N H W KH KW PH PW S D G GCin GCout */
1519 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1520//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1521//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1522 /************************* Conv 10 *************************/
1523 /* N H W KH KW PH PW S D G GCin GCout */
1524 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1525}
1526
1527static void InceptionV3(benchmark::internal::Benchmark* b) {
1528 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1529
1530 /* N H W KH KW PH PW S D G GCin GCout */
1531 b->Args({1, 299, 299, 3, 3, 0, 0, 2, 1, 1, 3, 32});
1532 b->Args({1, 149, 149, 3, 3, 0, 0, 1, 1, 1, 32, 32});
1533 b->Args({1, 147, 147, 3, 3, 2, 2, 1, 1, 1, 32, 64});
1534 b->Args({1, 73, 73, 1, 1, 0, 0, 1, 1, 1, 64, 80});
1535 b->Args({1, 73, 73, 3, 3, 0, 0, 1, 1, 1, 80, 192});
1536 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1537 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 48});
1538 b->Args({1, 35, 35, 5, 5, 4, 4, 1, 1, 1, 48, 64});
1539 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 64, 96});
1540 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 96, 96});
1541 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1542 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1543 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1544 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 64});
1545 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 48});
1546 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 288, 384});
1547 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 96, 96});
1548 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 192});
1549 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 128});
1550 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 128});
1551 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 192});
1552 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 128});
1553 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 192});
1554 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 160});
1555 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 160});
1556 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 192});
1557 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 160});
1558 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 192});
1559 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 192, 192});
1560 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 192, 192});
1561 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 320});
1562 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 192});
1563 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 320});
1564 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 384});
1565 b->Args({1, 8, 8, 1, 3, 0, 2, 1, 1, 1, 384, 384});
1566 b->Args({1, 8, 8, 3, 1, 2, 0, 1, 1, 1, 384, 384});
1567 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 448});
1568 b->Args({1, 8, 8, 3, 3, 2, 2, 1, 1, 1, 448, 384});
1569 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 192});
1570 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 320});
1571 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 384});
1572 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 448});
1573 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 192});
1574 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2048, 1001});
1575}
1576
1577static void ResNet18(benchmark::internal::Benchmark* b) {
1578 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1579
1580 /************************* Conv 1 *************************/
1581 /* N H W KH KW PH PW S D G GCin GCout */
1582 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1583 /************************ Conv 2.X ************************/
1584 /* N H W KH KW PH PW S D G GCin GCout */
1585 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1586 /************************ Conv 3.X ************************/
1587 /* N H W KH KW PH PW S D G GCin GCout */
1588 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 64, 128});
1589 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1590 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 64, 128});
1591 /************************ Conv 4.X ************************/
1592 /* N H W KH KW PH PW S D G GCin GCout */
1593 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 128, 256});
1594 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1595 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 128, 256});
1596 /************************ Conv 5.X ************************/
1597 /* N H W KH KW PH PW S D G GCin GCout */
1598 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 256, 512});
1599 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1600 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1601}
1602
1603static void ResNet50(benchmark::internal::Benchmark* b) {
1604 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1605
1606 /************************* Conv 1 *************************/
1607 /* N H W KH KW PH PW S D G GCin GCout */
1608 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1609 /************************ Conv 2.1 ************************/
1610 /* N H W KH KW PH PW S D G GCin GCout */
1611 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 64});
1612 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1613 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1614//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1615 /************************ Conv 2.X ************************/
1616 /* N H W KH KW PH PW S D G GCin GCout */
1617 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1618//b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1619//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1620 /************************ Conv 3.1 ************************/
1621 /* N H W KH KW PH PW S D G GCin GCout */
1622 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 128});
1623 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 128, 128});
1624 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1625 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1626 /************************ Conv 3.X ************************/
1627 /* N H W KH KW PH PW S D G GCin GCout */
1628 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 128});
1629 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1630//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1631 /************************ Conv 4.1 ************************/
1632 /* N H W KH KW PH PW S D G GCin GCout */
1633 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 256});
1634 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 256, 256});
1635 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1636 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 512, 1024});
1637 /************************ Conv 4.X ************************/
1638 /* N H W KH KW PH PW S D G GCin GCout */
1639 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 256});
1640 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1641//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1642 /************************ Conv 5.1 ************************/
1643 /* N H W KH KW PH PW S D G GCin GCout */
1644 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 512});
1645 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 512, 512});
1646 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1647 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 1024, 2048});
1648 /************************ Conv 5.X ************************/
1649 /* N H W KH KW PH PW S D G GCin GCout */
1650 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 2048, 512});
1651 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1652//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1653}
1654
1655static void VGG(benchmark::internal::Benchmark* b) {
1656 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1657
1658 /************************* Conv 1.1 ************************/
1659 /* N H W KH KW PH PW S D G GCin GCout */
1660 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 3, 64});
1661 /************************* Conv 1.2 ************************/
1662 /* N H W KH KW PH PW S D G GCin GCout */
1663 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1664
1665 /************************* Conv 2.1 ************************/
1666 /* N H W KH KW PH PW S D G GCin GCout */
1667 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 64, 128});
1668 /************************* Conv 2.2 ************************/
1669 /* N H W KH KW PH PW S D G GCin GCout */
1670 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1671
1672 /************************* Conv 3.1 ************************/
1673 /* N H W KH KW PH PW S D G GCin GCout */
1674 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 128, 256});
1675 /************************* Conv 3.2 ************************/
1676 /* N H W KH KW PH PW S D G GCin GCout */
1677 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1678 /************************* Conv 3.3 ************************/
1679 /* N H W KH KW PH PW S D G GCin GCout */
1680 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1681
1682 /************************* Conv 4.1 ************************/
1683 /* N H W KH KW PH PW S D G GCin GCout */
1684 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 256, 512});
1685 /************************* Conv 4.2 ************************/
1686 /* N H W KH KW PH PW S D G GCin GCout */
1687 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1688 /************************* Conv 4.3 ************************/
1689 /* N H W KH KW PH PW S D G GCin GCout */
1690 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1691
1692 /************************* Conv 5.X ************************/
1693 /* N H W KH KW PH PW S D G GCin GCout */
1694 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1695 /************************* Conv 5.3 ************************/
1696 /* N H W KH KW PH PW S D G GCin GCout */
1697 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1698}
1699
1700// SRCNN (9-1-5)
1701static void SRCNN915(benchmark::internal::Benchmark* b) {
1702 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1703
1704 /* N H W KH KW PH PW S D G GCin GCout */
1705 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1706 b->Args({1, 376, 376, 1, 1, 0, 0, 1, 1, 1, 64, 32});
1707 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1708}
1709
1710// SRCNN (9-3-5)
1711static void SRCNN935(benchmark::internal::Benchmark* b) {
1712 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1713
1714 /* N H W KH KW PH PW S D G GCin GCout */
1715 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1716 b->Args({1, 376, 376, 3, 3, 0, 0, 1, 1, 1, 64, 32});
1717 b->Args({1, 374, 374, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1718}
1719
1720// SRCNN (9-5-5)
1721static void SRCNN955(benchmark::internal::Benchmark* b) {
1722 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1723
1724 /* N H W KH KW PH PW S D G GCin GCout */
1725 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1726 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 64, 32});
1727 b->Args({1, 372, 372, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1728}
1729
1730BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1731BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1732BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1733BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1734BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1735BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1736BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1737BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1738BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1739BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1740BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1741BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1742BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1743BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1744BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1745BENCHMARK_CAPTURE(xnnpack_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1746BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1747BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1748BENCHMARK_CAPTURE(xnnpack_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1749BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1750BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1751BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1752
1753BENCHMARK_CAPTURE(xnnpack_convolution_q8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1754BENCHMARK_CAPTURE(xnnpack_convolution_q8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1755BENCHMARK_CAPTURE(xnnpack_convolution_q8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1756BENCHMARK_CAPTURE(xnnpack_convolution_q8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1757BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1758BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1759BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1760BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1761BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1762BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1763BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1764BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1765BENCHMARK_CAPTURE(xnnpack_convolution_q8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1766BENCHMARK_CAPTURE(xnnpack_convolution_q8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1767BENCHMARK_CAPTURE(xnnpack_convolution_q8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1768BENCHMARK_CAPTURE(xnnpack_convolution_q8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1769BENCHMARK_CAPTURE(xnnpack_convolution_q8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1770BENCHMARK_CAPTURE(xnnpack_convolution_q8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1771BENCHMARK_CAPTURE(xnnpack_convolution_q8, vgg, "VGG")->Apply(VGG)->UseRealTime();
1772BENCHMARK_CAPTURE(xnnpack_convolution_q8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1773BENCHMARK_CAPTURE(xnnpack_convolution_q8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1774BENCHMARK_CAPTURE(xnnpack_convolution_q8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1775
1776#ifdef BENCHMARK_TENSORFLOW_LITE
1777 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1778 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1779 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1780 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1781 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1782 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1783 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1784 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1785 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1786 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1787 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1788 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1789 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1790 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1791 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1792 BENCHMARK_CAPTURE(tflite_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1793 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1794 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1795 BENCHMARK_CAPTURE(tflite_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1796 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1797 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1798 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1799#endif // BENCHMARK_TENSORFLOW_LITE
1800
1801#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
1802 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1803 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1804 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1805 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1806 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1807 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1808 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1809 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1810 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1811 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1812 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1813 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1814 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1815 BENCHMARK_CAPTURE(armcl_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1816 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1817 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1818 BENCHMARK_CAPTURE(armcl_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1819 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1820 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1821 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1822#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
1823
1824#ifndef XNNPACK_BENCHMARK_NO_MAIN
1825BENCHMARK_MAIN();
1826#endif