blob: 4616f5fb3025f8d35c230c4a0bacdb2c699ca28a [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070013#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <ostream>
15#include <random>
16#include <string>
17#include <vector>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack.h>
20
Frank Barchardbb4c18b2019-09-30 11:05:52 -070021#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
22#include "arm_compute/core/Types.h"
23#include "arm_compute/runtime/Tensor.h"
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
26#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
27#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
XNNPACK Teamb455b122019-09-27 18:10:33 -070028#include <benchmark/benchmark.h>
Frank Barchard49b4dcc2020-06-26 14:07:19 -070029#include <fp16.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070030#ifdef BENCHMARK_TENSORFLOW_LITE
31#include "flatbuffers/include/flatbuffers/flatbuffers.h"
32#include "tensorflow/lite/interpreter.h"
33#include "tensorflow/lite/kernels/register.h"
34#include "tensorflow/lite/model.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070035#include "tensorflow/lite/schema/schema_generated.h"
36#include "tensorflow/lite/version.h"
37#endif // BENCHMARK_TENSORFLOW_LITE
Frank Barchardbb4c18b2019-09-30 11:05:52 -070038#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070039
Chao Meic6640272020-07-23 09:35:11 -070040#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -070041void xnnpack_convolution_qu8(benchmark::State& state, const char* net) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070042 const size_t batch_size = state.range(0);
43 const size_t input_height = state.range(1);
44 const size_t input_width = state.range(2);
45 const size_t kernel_height = state.range(3);
46 const size_t kernel_width = state.range(4);
47 const size_t padding_height = state.range(5);
48 const size_t padding_width = state.range(6);
49 const size_t subsampling = state.range(7);
50 const size_t dilation = state.range(8);
51 const size_t groups = state.range(9);
52 const size_t group_input_channels = state.range(10);
53 const size_t group_output_channels = state.range(11);
54
55 std::random_device random_device;
56 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -070057 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
Marat Dukhan44f0ca72020-08-02 21:46:58 -070058 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070059
60 const size_t output_pixel_stride = groups * group_output_channels;
61 const size_t input_pixel_stride = groups * group_input_channels;
62 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
63 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
64 const size_t padding_left = padding_width / 2;
65 const size_t padding_top = padding_height / 2;
66 const size_t padding_right = padding_width - padding_left;
67 const size_t padding_bottom = padding_height - padding_top;
68 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
69 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
70
71 std::vector<uint8_t> input(batch_size * input_height * input_width * input_pixel_stride);
72 std::generate(input.begin(), input.end(), std::ref(u8rng));
73 std::vector<uint8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
74 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
75 std::vector<int32_t> bias(groups * group_output_channels);
Marat Dukhanecd83112020-08-03 21:50:28 -070076 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070077 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
78
Marat Dukhan04f03be2019-11-19 12:36:47 -080079 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070080 if (status != xnn_status_success) {
81 state.SkipWithError("failed to initialize XNNPACK");
82 return;
83 }
84
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -070086 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -070087 sizeof(uint8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(uint8_t) * output_elements);
88 std::vector<uint8_t> output(output_elements * num_buffers);
89
90 std::vector<xnn_operator_t> convolution_operators(num_buffers);
91 for (xnn_operator_t& convolution_op : convolution_operators) {
Marat Dukhan08b7a972020-07-14 18:17:29 -070092 status = xnn_create_convolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 padding_top, padding_right, padding_bottom, padding_left,
94 kernel_height, kernel_width,
95 subsampling, subsampling,
96 dilation, dilation,
97 groups, group_input_channels, group_output_channels,
98 input_pixel_stride, output_pixel_stride,
99 127, 0.5f,
100 127, 0.5f,
101 kernel.data(), bias.data(),
102 127, 0.5f, 0, 255,
103 0 /* flags */, &convolution_op);
104 if (status != xnn_status_success) {
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700105 state.SkipWithError("failed to create QUINT8 Convolution operator");
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106 return;
107 }
108 }
109
110 for (size_t i = 0; i < convolution_operators.size(); i++) {
Marat Dukhan08b7a972020-07-14 18:17:29 -0700111 status = xnn_setup_convolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 convolution_operators[i],
113 batch_size, input_height, input_width,
114 input.data(), output.data() + i * output_elements,
115 nullptr /* thread pool */);
116 if (status != xnn_status_success) {
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700117 state.SkipWithError("failed to setup QUINT8 Convolution operator");
118 return;
119 }
120 }
121
122 size_t buffer_index = 0;
123 for (auto _ : state) {
124 state.PauseTiming();
125 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
126 buffer_index = (buffer_index + 1) % num_buffers;
127 state.ResumeTiming();
128
129 status = xnn_run_operator(convolution_operators[buffer_index],
130 nullptr /* thread pool */);
131 if (status != xnn_status_success) {
132 state.SkipWithError("failed to run QUINT8 Convolution operator");
133 return;
134 }
135 }
136
137 for (xnn_operator_t& convolution_op : convolution_operators) {
138 status = xnn_delete_operator(convolution_op);
139 if (status != xnn_status_success) {
140 state.SkipWithError("failed to delete QUINT8 Convolution operator");
141 return;
142 }
143 convolution_op = nullptr;
144 }
145
Marat Dukhand713e8a2020-12-04 14:23:12 -0800146 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
147 if (cpu_frequency != 0) {
148 state.counters["cpufreq"] = cpu_frequency;
149 }
150
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700151 state.counters["OPS"] = benchmark::Counter(
152 uint64_t(state.iterations()) * 2 *
153 batch_size * output_height * output_width *
154 groups * group_input_channels * group_output_channels *
155 kernel_height * kernel_width,
156 benchmark::Counter::kIsRate);
157}
158#endif // XNN_NO_QU8_OPERATORS
159
160#ifndef XNN_NO_QS8_OPERATORS
161void xnnpack_convolution_qs8(benchmark::State& state, const char* net) {
162 const size_t batch_size = state.range(0);
163 const size_t input_height = state.range(1);
164 const size_t input_width = state.range(2);
165 const size_t kernel_height = state.range(3);
166 const size_t kernel_width = state.range(4);
167 const size_t padding_height = state.range(5);
168 const size_t padding_width = state.range(6);
169 const size_t subsampling = state.range(7);
170 const size_t dilation = state.range(8);
171 const size_t groups = state.range(9);
172 const size_t group_input_channels = state.range(10);
173 const size_t group_output_channels = state.range(11);
174
175 std::random_device random_device;
176 auto rng = std::mt19937(random_device());
177 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
178 auto i8rng = std::bind(
179 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
180
181 const size_t output_pixel_stride = groups * group_output_channels;
182 const size_t input_pixel_stride = groups * group_input_channels;
183 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
184 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
185 const size_t padding_left = padding_width / 2;
186 const size_t padding_top = padding_height / 2;
187 const size_t padding_right = padding_width - padding_left;
188 const size_t padding_bottom = padding_height - padding_top;
189 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
190 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
191
192 std::vector<int8_t> input(batch_size * input_height * input_width * input_pixel_stride);
193 std::generate(input.begin(), input.end(), std::ref(i8rng));
194 std::vector<int8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
195 std::generate(kernel.begin(), kernel.end(), std::ref(i8rng));
196 std::vector<int32_t> bias(groups * group_output_channels);
197 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
198 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
199
200 xnn_status status = xnn_initialize(nullptr /* allocator */);
201 if (status != xnn_status_success) {
202 state.SkipWithError("failed to initialize XNNPACK");
203 return;
204 }
205
206 const size_t num_buffers = 1 +
207 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
208 sizeof(int8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(int8_t) * output_elements);
209 std::vector<int8_t> output(output_elements * num_buffers);
210
211 std::vector<xnn_operator_t> convolution_operators(num_buffers);
212 for (xnn_operator_t& convolution_op : convolution_operators) {
213 status = xnn_create_convolution2d_nhwc_qs8(
214 padding_top, padding_right, padding_bottom, padding_left,
215 kernel_height, kernel_width,
216 subsampling, subsampling,
217 dilation, dilation,
218 groups, group_input_channels, group_output_channels,
219 input_pixel_stride, output_pixel_stride,
220 127, 0.5f, 0.5f,
221 kernel.data(), bias.data(),
222 127, 0.5f, -128, 127,
223 0 /* flags */, &convolution_op);
224 if (status != xnn_status_success) {
225 state.SkipWithError("failed to create QINT8 Convolution operator");
226 return;
227 }
228 }
229
230 for (size_t i = 0; i < convolution_operators.size(); i++) {
231 status = xnn_setup_convolution2d_nhwc_qs8(
232 convolution_operators[i],
233 batch_size, input_height, input_width,
234 input.data(), output.data() + i * output_elements,
235 nullptr /* thread pool */);
236 if (status != xnn_status_success) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700237 state.SkipWithError("failed to setup QINT8 Convolution operator");
238 return;
239 }
240 }
241
242 size_t buffer_index = 0;
243 for (auto _ : state) {
244 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700245 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700246 buffer_index = (buffer_index + 1) % num_buffers;
247 state.ResumeTiming();
248
249 status = xnn_run_operator(convolution_operators[buffer_index],
250 nullptr /* thread pool */);
251 if (status != xnn_status_success) {
252 state.SkipWithError("failed to run QINT8 Convolution operator");
253 return;
254 }
255 }
256
257 for (xnn_operator_t& convolution_op : convolution_operators) {
258 status = xnn_delete_operator(convolution_op);
259 if (status != xnn_status_success) {
260 state.SkipWithError("failed to delete QINT8 Convolution operator");
261 return;
262 }
263 convolution_op = nullptr;
264 }
265
Marat Dukhand713e8a2020-12-04 14:23:12 -0800266 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
267 if (cpu_frequency != 0) {
268 state.counters["cpufreq"] = cpu_frequency;
269 }
270
XNNPACK Teamb455b122019-09-27 18:10:33 -0700271 state.counters["OPS"] = benchmark::Counter(
272 uint64_t(state.iterations()) * 2 *
273 batch_size * output_height * output_width *
274 groups * group_input_channels * group_output_channels *
275 kernel_height * kernel_width,
276 benchmark::Counter::kIsRate);
277}
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700278#endif // XNN_NO_QS8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700279
Chao Meic6640272020-07-23 09:35:11 -0700280#ifndef XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700281void xnnpack_convolution_f16(benchmark::State& state, const char* net) {
282 if (!benchmark::utils::CheckNEONFP16ARITH(state)) {
283 return;
284 }
285 const size_t batch_size = state.range(0);
286 const size_t input_height = state.range(1);
287 const size_t input_width = state.range(2);
288 const size_t kernel_height = state.range(3);
289 const size_t kernel_width = state.range(4);
290 const size_t padding_height = state.range(5);
291 const size_t padding_width = state.range(6);
292 const size_t subsampling = state.range(7);
293 const size_t dilation = state.range(8);
294 const size_t groups = state.range(9);
295 const size_t group_input_channels = state.range(10);
296 const size_t group_output_channels = state.range(11);
297
298 std::random_device random_device;
299 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700300 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700301 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
302
303 const size_t output_pixel_stride = groups * group_output_channels;
304 const size_t input_pixel_stride = groups * group_input_channels;
305 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
306 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
307 const size_t padding_left = padding_width / 2;
308 const size_t padding_top = padding_height / 2;
309 const size_t padding_right = padding_width - padding_left;
310 const size_t padding_bottom = padding_height - padding_top;
311 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
312 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
313
314 std::vector<uint16_t> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(uint16_t));
315 std::generate(input.begin(), input.end(), std::ref(f16rng));
316 std::vector<uint16_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
317 std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));
318 std::vector<uint16_t> bias(groups * group_output_channels);
319 std::generate(bias.begin(), bias.end(), std::ref(f16rng));
320 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
321
322 xnn_status status = xnn_initialize(nullptr /* allocator */);
323 if (status != xnn_status_success) {
324 state.SkipWithError("failed to initialize XNNPACK");
325 return;
326 }
327
328 const size_t num_buffers = 1 +
329 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
330 sizeof(uint16_t) * (kernel.size() + bias.size() + output_elements));
331 std::vector<uint16_t> output(output_elements * num_buffers);
332
333 std::vector<xnn_operator_t> convolution_operators(num_buffers);
334 for (xnn_operator_t& convolution_op : convolution_operators) {
335 status = xnn_create_convolution2d_nhwc_f16(
336 padding_top, padding_right, padding_bottom, padding_left,
337 kernel_height, kernel_width,
338 subsampling, subsampling,
339 dilation, dilation,
340 groups, group_input_channels, group_output_channels,
341 input_pixel_stride, output_pixel_stride,
342 kernel.data(), bias.data(),
343 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
344 0 /* flags */, &convolution_op);
345 if (status != xnn_status_success) {
346 state.SkipWithError("failed to create FP16 Convolution operator");
347 return;
348 }
349 }
350
351 for (size_t i = 0; i < convolution_operators.size(); i++) {
352 status = xnn_setup_convolution2d_nhwc_f16(
353 convolution_operators[i],
354 batch_size, input_height, input_width,
355 input.data(), output.data() + i * output_elements,
356 nullptr /* thread pool */);
357 if (status != xnn_status_success) {
358 state.SkipWithError("failed to setup FP16 Convolution operator");
359 return;
360 }
361 }
362
363 size_t buffer_index = 0;
364 for (auto _ : state) {
365 state.PauseTiming();
366 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint16_t));
367 buffer_index = (buffer_index + 1) % num_buffers;
368 state.ResumeTiming();
369
370 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
371 if (status != xnn_status_success) {
372 state.SkipWithError("failed to run FP16 Convolution operator");
373 return;
374 }
375 }
376
377 for (xnn_operator_t& convolution_op : convolution_operators) {
378 status = xnn_delete_operator(convolution_op);
379 if (status != xnn_status_success) {
380 state.SkipWithError("failed to delete FP16 Convolution operator");
381 return;
382 }
383 convolution_op = nullptr;
384 }
385
Marat Dukhand713e8a2020-12-04 14:23:12 -0800386 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
387 if (cpu_frequency != 0) {
388 state.counters["cpufreq"] = cpu_frequency;
389 }
390
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700391 state.counters["FLOPS"] = benchmark::Counter(
392 uint64_t(state.iterations()) * 2 *
393 batch_size * output_height * output_width *
394 groups * group_input_channels * group_output_channels *
395 kernel_height * kernel_width,
396 benchmark::Counter::kIsRate);
397}
Chao Meic6640272020-07-23 09:35:11 -0700398#endif // XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700399
XNNPACK Teamb455b122019-09-27 18:10:33 -0700400void xnnpack_convolution_f32(benchmark::State& state, const char* net) {
401 const size_t batch_size = state.range(0);
402 const size_t input_height = state.range(1);
403 const size_t input_width = state.range(2);
404 const size_t kernel_height = state.range(3);
405 const size_t kernel_width = state.range(4);
406 const size_t padding_height = state.range(5);
407 const size_t padding_width = state.range(6);
408 const size_t subsampling = state.range(7);
409 const size_t dilation = state.range(8);
410 const size_t groups = state.range(9);
411 const size_t group_input_channels = state.range(10);
412 const size_t group_output_channels = state.range(11);
413
414 std::random_device random_device;
415 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700416 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700417
418 const size_t output_pixel_stride = groups * group_output_channels;
419 const size_t input_pixel_stride = groups * group_input_channels;
420 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
421 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
422 const size_t padding_left = padding_width / 2;
423 const size_t padding_top = padding_height / 2;
424 const size_t padding_right = padding_width - padding_left;
425 const size_t padding_bottom = padding_height - padding_top;
426 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
427 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
428
429 std::vector<float> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(float));
430 std::generate(input.begin(), input.end(), std::ref(f32rng));
431 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
432 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
433 std::vector<float> bias(groups * group_output_channels);
434 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
435 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
436
Marat Dukhan04f03be2019-11-19 12:36:47 -0800437 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700438 if (status != xnn_status_success) {
439 state.SkipWithError("failed to initialize XNNPACK");
440 return;
441 }
442
XNNPACK Teamb455b122019-09-27 18:10:33 -0700443 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -0700444 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700445 sizeof(float) * (kernel.size() + bias.size() + output_elements));
446 std::vector<float> output(output_elements * num_buffers);
447
448 std::vector<xnn_operator_t> convolution_operators(num_buffers);
449 for (xnn_operator_t& convolution_op : convolution_operators) {
450 status = xnn_create_convolution2d_nhwc_f32(
451 padding_top, padding_right, padding_bottom, padding_left,
452 kernel_height, kernel_width,
453 subsampling, subsampling,
454 dilation, dilation,
455 groups, group_input_channels, group_output_channels,
456 input_pixel_stride, output_pixel_stride,
457 kernel.data(), bias.data(),
458 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
459 0 /* flags */, &convolution_op);
460 if (status != xnn_status_success) {
461 state.SkipWithError("failed to create FP32 Convolution operator");
462 return;
463 }
464 }
465
466 for (size_t i = 0; i < convolution_operators.size(); i++) {
467 status = xnn_setup_convolution2d_nhwc_f32(
468 convolution_operators[i],
469 batch_size, input_height, input_width,
470 input.data(), output.data() + i * output_elements,
471 nullptr /* thread pool */);
472 if (status != xnn_status_success) {
473 state.SkipWithError("failed to setup FP32 Convolution operator");
474 return;
475 }
476 }
477
478 size_t buffer_index = 0;
479 for (auto _ : state) {
480 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700481 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700482 buffer_index = (buffer_index + 1) % num_buffers;
483 state.ResumeTiming();
484
485 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
486 if (status != xnn_status_success) {
487 state.SkipWithError("failed to run FP32 Convolution operator");
488 return;
489 }
490 }
491
492 for (xnn_operator_t& convolution_op : convolution_operators) {
493 status = xnn_delete_operator(convolution_op);
494 if (status != xnn_status_success) {
495 state.SkipWithError("failed to delete FP32 Convolution operator");
496 return;
497 }
498 convolution_op = nullptr;
499 }
500
Marat Dukhand713e8a2020-12-04 14:23:12 -0800501 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
502 if (cpu_frequency != 0) {
503 state.counters["cpufreq"] = cpu_frequency;
504 }
505
XNNPACK Teamb455b122019-09-27 18:10:33 -0700506 state.counters["FLOPS"] = benchmark::Counter(
507 uint64_t(state.iterations()) * 2 *
508 batch_size * output_height * output_width *
509 groups * group_input_channels * group_output_channels *
510 kernel_height * kernel_width,
511 benchmark::Counter::kIsRate);
512}
513
514#ifdef BENCHMARK_TENSORFLOW_LITE
515void tflite_convolution_f32(benchmark::State& state, const char* net) {
516 const size_t batch_size = state.range(0);
517 const size_t input_height = state.range(1);
518 const size_t input_width = state.range(2);
519 const size_t kernel_height = state.range(3);
520 const size_t kernel_width = state.range(4);
521 const size_t padding_height = state.range(5);
522 const size_t padding_width = state.range(6);
523 const size_t subsampling = state.range(7);
524 const size_t dilation = state.range(8);
525 const size_t groups = state.range(9);
526 const size_t group_input_channels = state.range(10);
527 const size_t group_output_channels = state.range(11);
528
529 bool is_depthwise = false;
530 if (groups != 1) {
531 if (group_input_channels == 1) {
532 is_depthwise = true;
533 } else {
534 state.SkipWithError("grouped convolution is not supported");
535 return;
536 }
537 }
538
539 std::random_device random_device;
540 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700541 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700542
543 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
544 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
545
546 tflite::Padding padding = tflite::Padding_VALID;
547 if (padding_width == (effective_kernel_width - 1) && padding_height == (effective_kernel_height - 1)) {
548 padding = tflite::Padding_SAME;
549 } else if (padding_width == 0 && padding_height == 0) {
550 padding = tflite::Padding_VALID;
551 } else {
552 state.SkipWithError("unsupported padding");
553 return;
554 }
555
556 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
557 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
558
559 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
560 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
561 std::vector<float> bias(groups * group_output_channels);
562 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
563
564 flatbuffers::FlatBufferBuilder builder;
565 flatbuffers::Offset<tflite::OperatorCode> operator_code =
566 CreateOperatorCode(
567 builder,
568 is_depthwise ? tflite::BuiltinOperator_DEPTHWISE_CONV_2D : tflite::BuiltinOperator_CONV_2D,
569 0);
570
571 flatbuffers::Offset<tflite::Conv2DOptions> conv2d_options = CreateConv2DOptions(
572 builder,
573 padding,
574 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
575 tflite::ActivationFunctionType_NONE,
576 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
577
578 flatbuffers::Offset<tflite::DepthwiseConv2DOptions> dwconv2d_options = CreateDepthwiseConv2DOptions(
579 builder,
580 padding,
581 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
582 static_cast<int32_t>(group_output_channels),
583 tflite::ActivationFunctionType_NONE,
584 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
585
586 flatbuffers::Offset<tflite::Buffer> buffers[3] = {
587 tflite::CreateBuffer(builder, builder.CreateVector({})),
588 tflite::CreateBuffer(builder, builder.CreateVector(
589 reinterpret_cast<const uint8_t*>(kernel.data()),
590 sizeof(float) * kernel.size())),
591 tflite::CreateBuffer(builder, builder.CreateVector(
592 reinterpret_cast<const uint8_t*>(bias.data()),
593 sizeof(float) * bias.size())),
594 };
595
596 const int32_t input_shape[4] = {
597 static_cast<int32_t>(batch_size),
598 static_cast<int32_t>(input_height),
599 static_cast<int32_t>(input_width),
600 static_cast<int32_t>(groups * group_input_channels)
601 };
602 const int32_t output_shape[4] = {
603 static_cast<int32_t>(batch_size),
604 static_cast<int32_t>(output_height),
605 static_cast<int32_t>(output_width),
606 static_cast<int32_t>(groups * group_output_channels)
607 };
608 const int32_t filter_shape[4] = {
609 static_cast<int32_t>(group_output_channels),
610 static_cast<int32_t>(kernel_height),
611 static_cast<int32_t>(kernel_width),
612 static_cast<int32_t>(groups * group_input_channels)
613 };
614 const int32_t bias_shape[1] = {
615 static_cast<int32_t>(groups * group_output_channels)
616 };
617
618 flatbuffers::Offset<tflite::Tensor> tensors[4] = {
619 tflite::CreateTensor(builder,
620 builder.CreateVector<int32_t>(input_shape, 4),
621 tflite::TensorType_FLOAT32,
622 0 /* buffer id */,
623 builder.CreateString("input")),
624 tflite::CreateTensor(builder,
625 builder.CreateVector<int32_t>(filter_shape, 4),
626 tflite::TensorType_FLOAT32,
627 1 /* buffer id */,
628 builder.CreateString("filter")),
629 tflite::CreateTensor(builder,
630 builder.CreateVector<int32_t>(bias_shape, 1),
631 tflite::TensorType_FLOAT32,
632 2 /* buffer id */,
633 builder.CreateString("bias")),
634 tflite::CreateTensor(builder,
635 builder.CreateVector<int32_t>(output_shape, 4),
636 tflite::TensorType_FLOAT32,
637 0 /* buffer id */,
638 builder.CreateString("output")),
639 };
640
641 const int32_t op_inputs[3] = { 0, 1, 2 };
642 const int32_t op_outputs[1] = { 3 };
643 flatbuffers::Offset<tflite::Operator> op = CreateOperator(
644 builder,
645 0 /* opcode_index */,
646 builder.CreateVector<int32_t>(op_inputs, 3),
647 builder.CreateVector<int32_t>(op_outputs, 1),
648 is_depthwise ? tflite::BuiltinOptions_DepthwiseConv2DOptions : tflite::BuiltinOptions_Conv2DOptions,
649 is_depthwise ? dwconv2d_options.Union() : conv2d_options.Union(),
650 /*custom_options */ 0,
651 tflite::CustomOptionsFormat_FLEXBUFFERS);
652
653 const int32_t graph_inputs[1] = { 0 };
654 const int32_t graph_outputs[1] = { 3 };
655 flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
656 builder,
657 builder.CreateVector(tensors, 4),
658 builder.CreateVector<int32_t>(graph_inputs, 1),
659 builder.CreateVector<int32_t>(graph_outputs, 1),
660 builder.CreateVector(&op, 1),
661 builder.CreateString("Conv2D subgraph"));
662
663 flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("Conv2D model");
664
665 flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
666 TFLITE_SCHEMA_VERSION,
667 builder.CreateVector(&operator_code, 1),
668 builder.CreateVector(&subgraph, 1),
669 description,
670 builder.CreateVector(buffers, 3));
671
672 builder.Finish(model_buffer);
673
674 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
675 tflite::ops::builtin::BuiltinOpResolver resolver;
676 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
677 std::unique_ptr<tflite::Interpreter> interpreter;
678 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
679 state.SkipWithError("failed to create TFLite interpreter");
680 return;
681 }
682 if (interpreter == nullptr) {
683 state.SkipWithError("TFLite interpreter is null");
684 return;
685 }
686 interpreter->SetNumThreads(1);
687
688 if (interpreter->AllocateTensors() != kTfLiteOk) {
689 state.SkipWithError("failed to allocate tensors");
690 return;
691 }
692
693 std::generate(
694 interpreter->typed_tensor<float>(0),
695 interpreter->typed_tensor<float>(0) + batch_size * groups * group_input_channels * input_height * input_width,
696 std::ref(f32rng));
697
698 for (auto _ : state) {
699 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700700 benchmark::utils::WipeCache();
701 benchmark::utils::PrefetchToL1(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700702 interpreter->typed_tensor<float>(0),
703 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
704 state.ResumeTiming();
705
706 if (interpreter->Invoke() != kTfLiteOk) {
707 state.SkipWithError("failed to invoke TFLite interpreter");
708 return;
709 }
710 }
711
Marat Dukhand713e8a2020-12-04 14:23:12 -0800712 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
713 if (cpu_frequency != 0) {
714 state.counters["cpufreq"] = cpu_frequency;
715 }
716
XNNPACK Teamb455b122019-09-27 18:10:33 -0700717 state.counters["FLOPS"] = benchmark::Counter(
718 uint64_t(state.iterations()) * 2 *
719 batch_size * output_height * output_width *
720 groups * group_input_channels * group_output_channels *
721 kernel_height * kernel_width,
722 benchmark::Counter::kIsRate);
723
724 interpreter.reset();
725}
726#endif // BENCHMARK_TENSORFLOW_LITE
727
728#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
729static std::string compare_with_convolution_f32_reference_output(
730 const benchmark::State& state, const float* input, size_t input_size,
731 const float* kernel, size_t kernel_size, const float* bias, size_t bias_size,
732 const float* output, size_t output_size)
733{
734 const size_t batch_size = state.range(0);
735 const size_t input_height = state.range(1);
736 const size_t input_width = state.range(2);
737 const size_t kernel_height = state.range(3);
738 const size_t kernel_width = state.range(4);
739 const size_t padding_height = state.range(5);
740 const size_t padding_width = state.range(6);
741 const size_t subsampling = state.range(7);
742 const size_t dilation = state.range(8);
743 const size_t groups = state.range(9);
744 const size_t group_input_channels = state.range(10);
745 const size_t group_output_channels = state.range(11);
746
747 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
748 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
749 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
750 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
751 const size_t input_pixel_stride = groups * group_input_channels;
752 const size_t padding_left = padding_width / 2;
753 const size_t padding_top = padding_height / 2;
754
755 assert(input_size == batch_size * input_height * input_width * groups * group_input_channels);
756
757 assert(kernel_size == group_output_channels * kernel_height * kernel_width * groups * group_input_channels);
758
759 assert(bias_size == groups * group_output_channels);
760
761 assert(output_size == batch_size * output_height * output_width * groups * group_output_channels);
762
763 std::vector<float> output_ref(output_size);
764 for (size_t i = 0; i < batch_size; i++) {
765 for (size_t oy = 0; oy < output_height; oy++) {
766 for (size_t ox = 0; ox < output_width; ox++) {
767 for (size_t g = 0; g < groups; g++) {
768 for (size_t oc = 0; oc < group_output_channels; oc++) {
769 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] =
770 bias[g * group_output_channels + oc];
771 }
772 }
773 }
774 }
775 }
776 for (size_t i = 0; i < batch_size; i++) {
777 for (size_t oy = 0; oy < output_height; oy++) {
778 for (size_t ox = 0; ox < output_width; ox++) {
779 for (size_t ky = 0; ky < kernel_height; ky++) {
780 const size_t iy = oy * subsampling + ky * dilation - padding_top;
781 if (iy < input_height) {
782 for (size_t kx = 0; kx < kernel_width; kx++) {
783 const size_t ix = ox * subsampling + kx * dilation - padding_left;
784 if (ix < input_width) {
785 for (size_t g = 0; g < groups; g++) {
786 for (size_t oc = 0; oc < group_output_channels; oc++) {
787 for (size_t ic = 0; ic < group_input_channels; ic++) {
788 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
789 input[((i * input_height + iy) * input_width + ix) * input_pixel_stride + g * group_input_channels + ic] *
790 kernel[(((oc * kernel_height + ky) * kernel_width + kx) * groups + g) * group_input_channels + ic];
791 } // group_input_channels loop
792 } // group_output_channels loop
793 } // groups loop
794 }
795 } // kernel_width loop
796 }
797 } // kernel_height loop
798 } // output_width loop
799 } // output_height loop
800 } // batch_size loop
801
802 const float relative_error_tolerance = 1e-4;
803 for (size_t i = 0; i < batch_size; i++) {
804 for (size_t y = 0; y < output_height; y++) {
805 for (size_t x = 0; x < output_width; x++) {
806 for (size_t g = 0; g < groups; g++) {
807 for (size_t c = 0; c < group_output_channels; c++) {
808 const size_t idx = (((i * output_height + y) * output_width + x) * groups + g) * group_output_channels + c;
809 const float value_ref = output_ref[idx];
810 const float value = output[idx];
811 if (std::abs(value - value_ref) > std::max(std::abs(value_ref) * relative_error_tolerance, std::numeric_limits<float>::epsilon())) {
812 std::ostringstream error_stream;
813 error_stream << "(x, y) = (" << x << ", " << y << "), group = " << g
814 << ", channel = " << c << ", refValue = " << value_ref
815 << ", actualValue = " << value
816 << ", absDiff=" << std::abs(value - value_ref);
817 return error_stream.str();
818 }
819 }
820 }
821 }
822 }
823 }
824 return "";
825}
826
827void armcl_convolution_f32(benchmark::State& state, const char* net) {
828 const size_t batch_size = state.range(0);
829 const size_t input_height = state.range(1);
830 const size_t input_width = state.range(2);
831 const size_t kernel_height = state.range(3);
832 const size_t kernel_width = state.range(4);
833 const size_t padding_height = state.range(5);
834 const size_t padding_width = state.range(6);
835 const size_t subsampling = state.range(7);
836 const size_t dilation = state.range(8);
837 const size_t groups = state.range(9);
838 const size_t group_input_channels = state.range(10);
839 const size_t group_output_channels = state.range(11);
840
841 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
842 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
843 const size_t padding_left = padding_width / 2;
844 const size_t padding_top = padding_height / 2;
845 const size_t padding_right = padding_width - padding_left;
846 const size_t padding_bottom = padding_height - padding_top;
847 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
848 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
849
850 arm_compute::PadStrideInfo pad_stride_info(
851 subsampling /* stride height */,
852 subsampling /* stride width */,
853 padding_left, padding_right, padding_top, padding_bottom,
854 arm_compute::DimensionRoundingType::FLOOR);
855 arm_compute::Size2D dilation_info(dilation, dilation);
856 // Note: activation is disabled by default.
857 arm_compute::ActivationLayerInfo activation_info;
858
859 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
860 arm_compute::TensorShape input_shape(
861 /* C */ groups * group_input_channels,
862 /* W */ input_width,
863 /* H */ input_height,
864 /* N */ batch_size);
865 arm_compute::TensorInfo input_info(
866 input_shape,
867 1 /* number of channels per element (!) */,
868 arm_compute::DataType::F32);
869 input_info.set_data_layout(arm_compute::DataLayout::NHWC);
870 arm_compute::Tensor input_tensor;
871 input_tensor.allocator()->init(input_info);
872 input_tensor.allocator()->allocate();
873
874 // Note: reverse order of dimensions, i.e. for IWHO for OHWI.
875 arm_compute::TensorShape kernel_shape(
876 /* I */ groups * group_input_channels,
877 /* W */ kernel_width,
878 /* H */ kernel_height,
879 /* O */ group_output_channels);
880 arm_compute::TensorInfo kernel_info(
881 kernel_shape,
882 1 /* number of channels per element (!) */,
883 arm_compute::DataType::F32);
884 kernel_info.set_data_layout(arm_compute::DataLayout::NHWC);
885 arm_compute::Tensor kernelTensor;
886 kernelTensor.allocator()->init(kernel_info);
887 kernelTensor.allocator()->allocate();
888
889 arm_compute::TensorShape bias_shape(groups * group_output_channels);
890 arm_compute::TensorInfo bias_info(
891 bias_shape,
892 1 /* number of channels per element (!) */,
893 arm_compute::DataType::F32);
894 bias_info.set_data_layout(arm_compute::DataLayout::NHWC);
895 arm_compute::Tensor bias_tensor;
896 bias_tensor.allocator()->init(bias_info);
897 bias_tensor.allocator()->allocate();
898
899 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
900 arm_compute::TensorShape output_shape(
901 /* C */ groups * group_output_channels,
902 /* W */ output_width,
903 /* H */ output_height,
904 /* N */ batch_size);
905 arm_compute::TensorInfo output_info(
906 output_shape,
907 1 /* number of channels per element (!) */,
908 arm_compute::DataType::F32);
909 output_info.set_data_layout(arm_compute::DataLayout::NHWC);
910 arm_compute::Tensor output_tensor;
911 output_tensor.allocator()->init(output_info);
912 output_tensor.allocator()->allocate();
913
914 std::random_device random_device;
915 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700916 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700917
918 std::generate(
919 reinterpret_cast<float*>(input_tensor.buffer()),
920 reinterpret_cast<float*>(input_tensor.buffer()) + input_shape.total_size(),
921 std::ref(f32rng));
922 std::generate(
923 reinterpret_cast<float*>(kernelTensor.buffer()),
924 reinterpret_cast<float*>(kernelTensor.buffer()) + kernel_shape.total_size(),
925 std::ref(f32rng));
926 std::generate(
927 reinterpret_cast<float*>(bias_tensor.buffer()),
928 reinterpret_cast<float*>(bias_tensor.buffer()) + bias_shape.total_size(),
929 std::ref(f32rng));
930 std::generate(
931 reinterpret_cast<float*>(output_tensor.buffer()),
932 reinterpret_cast<float*>(output_tensor.buffer()) + output_shape.total_size(),
933 std::ref(f32rng));
934
935 bool is_depthwise = false;
936 if (groups != 1) {
937 // NEConvolutionLayer uses NEGEMMConvolutionLayer by default, which doesn't support grouped convolution.
938 // However, depthwise convolution is supported via NEDepthwiseConvolutionLayer.
939 if (group_input_channels == 1) {
940 is_depthwise = true;
941 } else {
942 state.SkipWithError("grouped convolution is not supported");
943 return;
944 }
945 }
946
947 std::shared_ptr<arm_compute::IFunction> layer;
948 if (is_depthwise) {
949 if (dilation != 1) {
950 state.SkipWithError("dilated depthwise convolution is not supported");
951 return;
952 }
953
954 // Avoid NEDepthwiseConvolutionLayer3x3 when stride isn't 2 in order to pass the output verification.
955 // TODO(b/130206370) This looks like a bug and needs further investigation.
956 if (kernel_height == 3 && kernel_width == 3 && subsampling == 2) {
957 auto* depthwise_3x3_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer3x3();
958 layer.reset(depthwise_3x3_convolution_layer);
959 depthwise_3x3_convolution_layer->configure(
960 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
961 pad_stride_info, group_output_channels, activation_info);
962
963 if (!depthwise_3x3_convolution_layer->validate(
964 &input_info, &kernel_info, &bias_info, &output_info,
965 pad_stride_info, group_output_channels, activation_info))
966 {
967 state.SkipWithError("validation failed");
968 return;
969 }
970 } else {
971 auto* depthwise_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer();
972 layer.reset(depthwise_convolution_layer);
973 depthwise_convolution_layer->configure(
974 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
975 pad_stride_info, group_output_channels, activation_info);
976
977 if (!depthwise_convolution_layer->validate(
978 &input_info, &kernel_info, &bias_info, &output_info,
979 pad_stride_info, group_output_channels, activation_info))
980 {
981 state.SkipWithError("validation failed");
982 return;
983 }
984 }
985 } else {
986 auto* convolution_layer = new arm_compute::NEConvolutionLayer();
987 layer.reset(convolution_layer);
988 convolution_layer->configure(
989 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
990 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
991 true /* enable fast math */, groups);
992
993 if (!convolution_layer->validate(
994 &input_info, &kernel_info, &bias_info, &output_info,
995 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
996 true /* enable fast math */, groups))
997 {
998 state.SkipWithError("validation failed");
999 return;
1000 }
1001 }
1002
1003 // Dry run to let ACL do one-time initializations.
1004 arm_compute::CPPScheduler::get().set_num_threads(1);
1005 layer->run();
1006
1007 for (auto _ : state) {
1008 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -07001009 benchmark::utils::WipeCache();
1010 benchmark::utils::PrefetchToL1(
XNNPACK Teamb455b122019-09-27 18:10:33 -07001011 input_tensor.buffer(),
1012 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
1013 state.ResumeTiming();
1014
1015 layer->run();
1016 }
1017
1018 // Validate outputs.
1019 const std::string error_string = compare_with_convolution_f32_reference_output(
1020 state, reinterpret_cast<const float*>(input_tensor.buffer()),
1021 input_shape.total_size(),
1022 reinterpret_cast<const float*>(kernelTensor.buffer()),
1023 kernel_shape.total_size(),
1024 reinterpret_cast<const float*>(bias_tensor.buffer()),
1025 bias_shape.total_size(),
1026 reinterpret_cast<const float*>(output_tensor.buffer()),
1027 output_shape.total_size());
1028
1029 if (!error_string.empty()) {
1030 state.SkipWithError(("validation failed: " + error_string).c_str());
1031 return;
1032 }
1033
1034 input_tensor.allocator()->free();
1035 kernelTensor.allocator()->free();
1036 bias_tensor.allocator()->free();
1037 output_tensor.allocator()->free();
1038
Marat Dukhand713e8a2020-12-04 14:23:12 -08001039 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
1040 if (cpu_frequency != 0) {
1041 state.counters["cpufreq"] = cpu_frequency;
1042 }
1043
XNNPACK Teamb455b122019-09-27 18:10:33 -07001044 state.counters["FLOPS"] = benchmark::Counter(
1045 uint64_t(state.iterations()) * 2 *
1046 batch_size * output_height * output_width *
1047 groups * group_input_channels * group_output_channels *
1048 kernel_height * kernel_width,
1049 benchmark::Counter::kIsRate);
1050}
1051#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
1052
1053// ShuffleNet v1 with 1 group.
1054static void ShuffleNetV1G1(benchmark::internal::Benchmark* b) {
1055 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1056
1057 /*************************** Conv 1 **************************/
1058 /* N H W KH KW PH PW S D G GCin GCout */
1059 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1060 /******************* Stage 2: stride-2 unit ******************/
1061 /* N H W KH KW PH PW S D G GCin GCout */
1062 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 36});
1063 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 36, 1, 1});
1064 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 120});
1065 /******************* Stage 2: stride-1 units *****************/
1066 /* N H W KH KW PH PW S D G GCin GCout */
1067 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 36});
1068 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 36, 1, 1});
1069 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 144});
1070 /******************* Stage 3: stride-2 unit ******************/
1071 /* N H W KH KW PH PW S D G GCin GCout */
1072 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 72});
1073 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1074 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 144});
1075 /******************* Stage 3: stride-1 units *****************/
1076 /* N H W KH KW PH PW S D G GCin GCout */
1077 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 72});
1078 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1079 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 288});
1080 /******************* Stage 4: stride-2 unit ******************/
1081 /* N H W KH KW PH PW S D G GCin GCout */
1082 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 144});
1083 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1084 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 288});
1085 /******************* Stage 4: stride-1 units *****************/
1086 /* N H W KH KW PH PW S D G GCin GCout */
1087 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1088 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1089 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1090}
1091
1092// ShuffleNet v1 with 2 groups.
1093static void ShuffleNetV1G2(benchmark::internal::Benchmark* b) {
1094 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1095
1096 /*************************** Conv 1 **************************/
1097 /* N H W KH KW PH PW S D G GCin GCout */
1098 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1099 /******************* Stage 2: stride-2 unit ******************/
1100 /* N H W KH KW PH PW S D G GCin GCout */
1101 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 50});
1102 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 50, 1, 1});
1103 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 88});
1104 /******************* Stage 2: stride-1 units *****************/
1105 /* N H W KH KW PH PW S D G GCin GCout */
1106 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 25});
1107 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 50, 1, 1});
1108 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 100});
1109 /******************* Stage 3: stride-2 unit ******************/
1110 /* N H W KH KW PH PW S D G GCin GCout */
1111 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 50});
1112 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 100, 1, 1});
1113 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 100});
1114 /******************* Stage 3: stride-1 units *****************/
1115 /* N H W KH KW PH PW S D G GCin GCout */
1116 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 50});
1117 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 100, 1, 1});
1118 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 200});
1119 /******************* Stage 4: stride-2 unit ******************/
1120 /* N H W KH KW PH PW S D G GCin GCout */
1121 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 100});
1122 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 200, 1, 1});
1123 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 200});
1124 /******************* Stage 4: stride-1 units *****************/
1125 /* N H W KH KW PH PW S D G GCin GCout */
1126 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 400, 100});
1127 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 200, 1, 1});
1128 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 400});
1129}
1130
1131// ShuffleNet v1 with 3 groups.
1132static void ShuffleNetV1G3(benchmark::internal::Benchmark* b) {
1133 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1134
1135 /*************************** Conv 1 **************************/
1136 /* N H W KH KW PH PW S D G GCin GCout */
1137 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1138 /******************* Stage 2: stride-2 unit ******************/
1139 /* N H W KH KW PH PW S D G GCin GCout */
1140 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 60});
1141 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 60, 1, 1});
1142 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 72});
1143 /******************* Stage 2: stride-1 units *****************/
1144 /* N H W KH KW PH PW S D G GCin GCout */
1145 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 20});
1146 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 60, 1, 1});
1147 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 80});
1148 /******************* Stage 3: stride-2 unit ******************/
1149 /* N H W KH KW PH PW S D G GCin GCout */
1150 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 40});
1151 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 120, 1, 1});
1152 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 80});
1153 /******************* Stage 3: stride-1 units *****************/
1154 /* N H W KH KW PH PW S D G GCin GCout */
1155 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 40});
1156 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 120, 1, 1});
1157 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 160});
1158 /******************* Stage 4: stride-2 unit ******************/
1159 /* N H W KH KW PH PW S D G GCin GCout */
1160 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 80});
1161 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1162 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 160});
1163 /******************* Stage 4: stride-1 units *****************/
1164 /* N H W KH KW PH PW S D G GCin GCout */
1165 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 320, 80});
1166 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1167 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 320});
1168}
1169
1170// ShuffleNet v1 with 4 groups.
1171static void ShuffleNetV1G4(benchmark::internal::Benchmark* b) {
1172 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1173
1174 /*************************** Conv 1 **************************/
1175 /* N H W KH KW PH PW S D G GCin GCout */
1176 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1177 /******************* Stage 2: stride-2 unit ******************/
1178 /* N H W KH KW PH PW S D G GCin GCout */
1179 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 68});
1180 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 68, 1, 1});
1181 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 62});
1182 /******************* Stage 2: stride-1 units *****************/
1183 /* N H W KH KW PH PW S D G GCin GCout */
1184 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 17});
1185 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 68, 1, 1});
1186 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 68});
1187 /******************* Stage 3: stride-2 unit ******************/
1188 /* N H W KH KW PH PW S D G GCin GCout */
1189 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 34});
1190 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 136, 1, 1});
1191 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 68});
1192 /******************* Stage 3: stride-1 units *****************/
1193 /* N H W KH KW PH PW S D G GCin GCout */
1194 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 34});
1195 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 136, 1, 1});
1196 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 136});
1197 /******************* Stage 4: stride-2 unit ******************/
1198 /* N H W KH KW PH PW S D G GCin GCout */
1199 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 68});
1200 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 272, 1, 1});
1201 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 136});
1202 /******************* Stage 4: stride-1 units *****************/
1203 /* N H W KH KW PH PW S D G GCin GCout */
1204 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 272, 68});
1205 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 272, 1, 1});
1206 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 272});
1207}
1208
1209// ShuffleNet v1 with 8 groups.
1210static void ShuffleNetV1G8(benchmark::internal::Benchmark* b) {
1211 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1212
1213 /*************************** Conv 1 **************************/
1214 /* N H W KH KW PH PW S D G GCin GCout */
1215 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1216 /******************* Stage 2: stride-2 unit ******************/
1217 /* N H W KH KW PH PW S D G GCin GCout */
1218 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1219 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1220 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 45});
1221 /******************* Stage 2: stride-1 units *****************/
1222 /* N H W KH KW PH PW S D G GCin GCout */
1223 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 12});
1224 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1225 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 48});
1226 /******************* Stage 3: stride-2 unit ******************/
1227 /* N H W KH KW PH PW S D G GCin GCout */
1228 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 24});
1229 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1230 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 48});
1231 /******************* Stage 3: stride-1 units *****************/
1232 /* N H W KH KW PH PW S D G GCin GCout */
1233 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 24});
1234 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1235 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 96});
1236 /******************* Stage 4: stride-2 unit ******************/
1237 /* N H W KH KW PH PW S D G GCin GCout */
1238 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 48});
1239 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 384, 1, 1});
1240 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 96});
1241 /******************* Stage 4: stride-1 units *****************/
1242 /* N H W KH KW PH PW S D G GCin GCout */
1243 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 192, 48});
1244 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 384, 1, 1});
1245 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 192});
1246}
1247
1248// ShuffleNet v2 (0.5X scale)
1249static void ShuffleNetV2X05(benchmark::internal::Benchmark* b) {
1250 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1251
1252 /*************************** Conv 1 **************************/
1253 /* N H W KH KW PH PW S D G GCin GCout */
1254 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1255 /************************** Stage 2 **************************/
1256 /* N H W KH KW PH PW S D G GCin GCout */
1257 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1258 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1259 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1260 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 24, 1, 1});
1261 /************************** Stage 3 **************************/
1262 /* N H W KH KW PH PW S D G GCin GCout */
1263 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 48, 1, 1});
1264 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1265 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1266 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 48, 1, 1});
1267 /************************** Stage 4 **************************/
1268 /* N H W KH KW PH PW S D G GCin GCout */
1269 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1270 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1271 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1272 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 96, 1, 1});
1273 /*************************** Conv 5 **************************/
1274 /* N H W KH KW PH PW S D G GCin GCout */
1275 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 192, 1024});
1276}
1277
1278// ShuffleNet v2 (1.0X scale)
1279static void ShuffleNetV2X10(benchmark::internal::Benchmark* b) {
1280 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1281
1282 /*************************** Conv 1 **************************/
1283 /* N H W KH KW PH PW S D G GCin GCout */
1284 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1285 /************************** Stage 2 **************************/
1286 /* N H W KH KW PH PW S D G GCin GCout */
1287 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1288 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1289 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1290 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 58, 1, 1});
1291 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 58, 58});
1292 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 58, 1, 1});
1293 /************************** Stage 3 **************************/
1294 /* N H W KH KW PH PW S D G GCin GCout */
1295 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 116, 1, 1});
1296 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1297 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1298 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 116, 1, 1});
1299 /************************** Stage 4 **************************/
1300 /* N H W KH KW PH PW S D G GCin GCout */
1301 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 232, 1, 1});
1302 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1303 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1304 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 232, 1, 1});
1305 /*************************** Conv 5 **************************/
1306 /* N H W KH KW PH PW S D G GCin GCout */
1307 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 464, 1024});
1308}
1309
1310// ShuffleNet v2 (1.5X scale)
1311static void ShuffleNetV2X15(benchmark::internal::Benchmark* b) {
1312 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1313
1314 /*************************** Conv 1 **************************/
1315 /* N H W KH KW PH PW S D G GCin GCout */
1316 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1317 /************************** Stage 2 **************************/
1318 /* N H W KH KW PH PW S D G GCin GCout */
1319 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1320 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1321 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1322 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 88, 1, 1});
1323 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 88});
1324 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1325 /************************** Stage 3 **************************/
1326 /* N H W KH KW PH PW S D G GCin GCout */
1327 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 176, 1, 1});
1328 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1329 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1330 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 176, 1, 1});
1331 /************************** Stage 4 **************************/
1332 /* N H W KH KW PH PW S D G GCin GCout */
1333 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 352, 1, 1});
1334 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1335 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1336 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 352, 1, 1});
1337 /*************************** Conv 5 **************************/
1338 /* N H W KH KW PH PW S D G GCin GCout */
1339 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 704, 1024});
1340}
1341
1342// ShuffleNet v2 (2.0X scale)
1343static void ShuffleNetV2X20(benchmark::internal::Benchmark* b) {
1344 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1345
1346 /*************************** Conv 1 **************************/
1347 /* N H W KH KW PH PW S D G GCin GCout */
1348 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1349 /************************** Stage 2 **************************/
1350 /* N H W KH KW PH PW S D G GCin GCout */
1351 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1352 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1353 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1354 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 122, 1, 1});
1355 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 122, 122});
1356 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 122, 1, 1});
1357 /************************** Stage 3 **************************/
1358 /* N H W KH KW PH PW S D G GCin GCout */
1359 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 244, 1, 1});
1360 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1361 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1362 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 244, 1, 1});
1363 /************************** Stage 4 **************************/
1364 /* N H W KH KW PH PW S D G GCin GCout */
1365 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 488, 1, 1});
1366 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1367 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1368 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 488, 1, 1});
1369 /*************************** Conv 5 **************************/
1370 /* N H W KH KW PH PW S D G GCin GCout */
1371 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 976, 2048});
1372}
1373
1374static void MobileNetV1(benchmark::internal::Benchmark* b) {
1375 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1376
1377 /* N H W KH KW PH PW S D G GCin GCout */
1378 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1379 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1380 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 64});
1381 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1382 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 128});
1383 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 128, 1, 1});
1384 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 128, 128});
1385 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 128, 1, 1});
1386 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 256});
1387 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 256, 1, 1});
1388 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1389 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 256, 1, 1});
1390 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 512});
1391 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 512, 1, 1});
1392 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1393 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 512, 1, 1});
1394 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 1024});
1395 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1024, 1, 1});
1396 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 1024, 1024});
1397}
1398
1399static void MobileNetV2(benchmark::internal::Benchmark* b) {
1400 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1401
1402 /* N H W KH KW PH PW S D G GCin GCout */
1403 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1404
1405 /************************ Bottleneck 1 ***********************/
1406 /* N H W KH KW PH PW S D G GCin GCout */
1407 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1408 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 16});
1409
1410 /************************ Bottleneck 2 ***********************/
1411 /* N H W KH KW PH PW S D G GCin GCout */
1412 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 96});
1413 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1414 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1415 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1416 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 144, 1, 1});
1417 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 144, 24});
1418
1419 /************************ Bottleneck 3 ***********************/
1420 /* N H W KH KW PH PW S D G GCin GCout */
1421//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1422 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1423 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 32});
1424 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1425 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1426 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1427//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1428//b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1429//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1430
1431 /************************ Bottleneck 4 ***********************/
1432 /* N H W KH KW PH PW S D G GCin GCout */
1433//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1434 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1435 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1436 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1437 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1438 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1439//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1440//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1441//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1442//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1443//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1444//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1445
1446 /************************ Bottleneck 5 ***********************/
1447 /* N H W KH KW PH PW S D G GCin GCout */
1448//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1449//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1450 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 96});
1451 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1452 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1453 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1454//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1455//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1456//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1457
1458 /************************ Bottleneck 6 ***********************/
1459 /* N H W KH KW PH PW S D G GCin GCout */
1460//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1461 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 576, 1, 1});
1462 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 160});
1463 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1464 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1465 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1466//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1467//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1468//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1469
1470 /************************ Bottleneck 7 ***********************/
1471 /* N H W KH KW PH PW S D G GCin GCout */
1472//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1473//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1474 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 320});
1475
1476 /******************** Pre-pooling Conv2D *********************/
1477 /* N H W KH KW PH PW S D G GCin GCout */
1478 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 320, 1280});
1479 /******************** Post-pooling Conv2D ********************/
1480 /* N H W KH KW PH PW S D G GCin GCout */
1481 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1000});
1482}
1483
1484static void MobileNetV3Small(benchmark::internal::Benchmark* b) {
1485 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1486
1487 /*********************** Initial Stage ***********************/
1488 /* N H W KH KW PH PW S D G GCin GCout */
1489 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1490 /*********************** Bottleneck 1 ************************/
1491 /* N H W KH KW PH PW S D G GCin GCout */
1492 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 16, 1, 1});
1493 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 16, 8});
1494 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 8, 16});
1495 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1496 /*********************** Bottleneck 2 ************************/
1497 /* N H W KH KW PH PW S D G GCin GCout */
1498 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 72});
1499 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1500 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1501 /*********************** Bottleneck 3 ************************/
1502 /* N H W KH KW PH PW S D G GCin GCout */
1503 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1504 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1505 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 24});
1506 /*********************** Bottleneck 4 ************************/
1507 /* N H W KH KW PH PW S D G GCin GCout */
1508 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1509 b->Args({1, 28, 28, 5, 5, 4, 4, 2, 1, 96, 1, 1});
1510 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1511 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1512 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 40});
1513 /*********************** Bottleneck 5 ************************/
1514 /* N H W KH KW PH PW S D G GCin GCout */
1515 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1516 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1517 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1518 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1519 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1520 /*********************** Bottleneck 6 ************************/
1521 /* N H W KH KW PH PW S D G GCin GCout */
1522//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1523//b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1524//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1525//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1526//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1527 /*********************** Bottleneck 7 ************************/
1528 /* N H W KH KW PH PW S D G GCin GCout */
1529 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1530 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1531 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1532 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1533 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 120, 48});
1534 /*********************** Bottleneck 8 ************************/
1535 /* N H W KH KW PH PW S D G GCin GCout */
1536 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 144});
1537 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 144, 1, 1});
1538 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 40});
1539 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 40, 144});
1540 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 144, 48});
1541 /*********************** Bottleneck 9 ************************/
1542 /* N H W KH KW PH PW S D G GCin GCout */
1543 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 288});
1544 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 288, 1, 1});
1545 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 288, 72});
1546 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 288});
1547 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 288, 96});
1548 /*********************** Bottleneck 10 ***********************/
1549 /* N H W KH KW PH PW S D G GCin GCout */
1550 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1551 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1552 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1553 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1554 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1555 /*********************** Bottleneck 11 ***********************/
1556 /* N H W KH KW PH PW S D G GCin GCout */
1557//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1558//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1559//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1560//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1561//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1562 /************************ Last Stage ************************/
1563 /* N H W KH KW PH PW S D G GCin GCout */
1564//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1565 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 1024});
1566 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1024, 1001});
1567}
1568
1569static void MobileNetV3Large(benchmark::internal::Benchmark* b) {
1570 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1571
1572 /*********************** Initial Stage ***********************/
1573 /* N H W KH KW PH PW S D G GCin GCout */
1574 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1575 /*********************** Bottleneck 1 ************************/
1576 /* N H W KH KW PH PW S D G GCin GCout */
1577 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 16, 1, 1});
1578 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1579 /*********************** Bottleneck 2 ************************/
1580 /* N H W KH KW PH PW S D G GCin GCout */
1581 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1582 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1583 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 24});
1584 /*********************** Bottleneck 3 ************************/
1585 /* N H W KH KW PH PW S D G GCin GCout */
1586 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1587 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 72, 1, 1});
1588 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1589 /*********************** Bottleneck 4 ************************/
1590 /* N H W KH KW PH PW S D G GCin GCout */
1591//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1592 b->Args({1, 56, 56, 5, 5, 4, 4, 2, 1, 72, 1, 1});
1593 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1594 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1595 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 40});
1596 /*********************** Bottleneck 5 ************************/
1597 /* N H W KH KW PH PW S D G GCin GCout */
1598 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1599 b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1600 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1601 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1602 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1603 /*********************** Bottleneck 6 ************************/
1604 /* N H W KH KW PH PW S D G GCin GCout */
1605//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1606//b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1607//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1608//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1609//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1610 /*********************** Bottleneck 7 ************************/
1611 /* N H W KH KW PH PW S D G GCin GCout */
1612 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1613 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1614 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 80});
1615 /*********************** Bottleneck 8 ************************/
1616 /* N H W KH KW PH PW S D G GCin GCout */
1617 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 200});
1618 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 200, 1, 1});
1619 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 200, 80});
1620 /*********************** Bottleneck 9 ************************/
1621 /* N H W KH KW PH PW S D G GCin GCout */
1622 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1623 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1624 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1625 /********************** Bottleneck 10 ***********************/
1626 /* N H W KH KW PH PW S D G GCin GCout */
1627//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1628//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1629//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1630 /********************** Bottleneck 11 ***********************/
1631 /* N H W KH KW PH PW S D G GCin GCout */
1632 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 480});
1633 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 480, 1, 1});
1634 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 480, 120});
1635 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 480});
1636 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 480, 112});
1637 /********************** Bottleneck 12 ***********************/
1638 /* N H W KH KW PH PW S D G GCin GCout */
1639 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1640 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 672, 1, 1});
1641 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 672, 168});
1642 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 168, 672});
1643 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 672, 112});
1644 /********************** Bottleneck 13 ***********************/
1645 /* N H W KH KW PH PW S D G GCin GCout */
1646//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1647 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 672, 1, 1});
1648 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 672, 160});
1649 /********************** Bottleneck 14 ***********************/
1650 /* N H W KH KW PH PW S D G GCin GCout */
1651 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1652 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1653 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1654 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1655 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1656 /********************** Bottleneck 15 ***********************/
1657 /* N H W KH KW PH PW S D G GCin GCout */
1658//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1659//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1660//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1661//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1662//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1663 /************************ Last Stage ***********************/
1664 /* N H W KH KW PH PW S D G GCin GCout */
1665//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1666 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 1280});
1667 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1001});
1668}
1669
1670// SqueezeNet 1.0
1671static void SqueezeNetV10(benchmark::internal::Benchmark* b) {
1672 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1673
1674 /************************** Conv 1 *************************/
1675 /* N H W KH KW PH PW S D G GCin GCout */
1676 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 96});
1677 /************************** Fire 2 *************************/
1678 /* N H W KH KW PH PW S D G GCin GCout */
1679 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 96, 16});
1680 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1681 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1682 /************************** Fire 3 *************************/
1683 /* N H W KH KW PH PW S D G GCin GCout */
1684 b->Args({1, 56, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1685//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1686//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1687 /************************** Fire 4 *************************/
1688 /* N H W KH KW PH PW S D G GCin GCout */
1689 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1690 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1691 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1692 /************************** Fire 5 *************************/
1693 /* N H W KH KW PH PW S D G GCin GCout */
1694 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1695 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1696 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1697 /************************** Fire 6 *************************/
1698 /* N H W KH KW PH PW S D G GCin GCout */
1699 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1700 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1701 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1702 /************************** Fire 7 *************************/
1703 /* N H W KH KW PH PW S D G GCin GCout */
1704 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1705//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1706//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1707 /************************** Fire 8 *************************/
1708 /* N H W KH KW PH PW S D G GCin GCout */
1709 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1710 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1711 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1712 /************************** Fire 9 *************************/
1713 /* N H W KH KW PH PW S D G GCin GCout */
1714 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1715 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1716 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1717 /************************* Conv 10 *************************/
1718 /* N H W KH KW PH PW S D G GCin GCout */
1719 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1720}
1721
1722// SqueezeNet 1.1
1723static void SqueezeNetV11(benchmark::internal::Benchmark* b) {
1724 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1725
1726 /************************** Conv 1 *************************/
1727 /* N H W KH KW PH PW S D G GCin GCout */
1728 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 64});
1729 /************************** Fire 2 *************************/
1730 /* N H W KH KW PH PW S D G GCin GCout */
1731 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 64, 16});
1732 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1733 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1734 /************************** Fire 3 *************************/
1735 /* N H W KH KW PH PW S D G GCin GCout */
1736 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1737//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1738//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1739 /************************** Fire 4 *************************/
1740 /* N H W KH KW PH PW S D G GCin GCout */
1741 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1742 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1743 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1744 /************************** Fire 5 *************************/
1745 /* N H W KH KW PH PW S D G GCin GCout */
1746 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1747//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1748//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1749 /************************** Fire 6 *************************/
1750 /* N H W KH KW PH PW S D G GCin GCout */
1751 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1752 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1753 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1754 /************************** Fire 7 *************************/
1755 /* N H W KH KW PH PW S D G GCin GCout */
1756 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1757//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1758//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1759 /************************** Fire 8 *************************/
1760 /* N H W KH KW PH PW S D G GCin GCout */
1761 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1762 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1763 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1764 /************************** Fire 9 *************************/
1765 /* N H W KH KW PH PW S D G GCin GCout */
1766 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1767//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1768//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1769 /************************* Conv 10 *************************/
1770 /* N H W KH KW PH PW S D G GCin GCout */
1771 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1772}
1773
1774static void InceptionV3(benchmark::internal::Benchmark* b) {
1775 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1776
1777 /* N H W KH KW PH PW S D G GCin GCout */
1778 b->Args({1, 299, 299, 3, 3, 0, 0, 2, 1, 1, 3, 32});
1779 b->Args({1, 149, 149, 3, 3, 0, 0, 1, 1, 1, 32, 32});
1780 b->Args({1, 147, 147, 3, 3, 2, 2, 1, 1, 1, 32, 64});
1781 b->Args({1, 73, 73, 1, 1, 0, 0, 1, 1, 1, 64, 80});
1782 b->Args({1, 73, 73, 3, 3, 0, 0, 1, 1, 1, 80, 192});
1783 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1784 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 48});
1785 b->Args({1, 35, 35, 5, 5, 4, 4, 1, 1, 1, 48, 64});
1786 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 64, 96});
1787 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 96, 96});
1788 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1789 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1790 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1791 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 64});
1792 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 48});
1793 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 288, 384});
1794 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 96, 96});
1795 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 192});
1796 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 128});
1797 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 128});
1798 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 192});
1799 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 128});
1800 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 192});
1801 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 160});
1802 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 160});
1803 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 192});
1804 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 160});
1805 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 192});
1806 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 192, 192});
1807 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 192, 192});
1808 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 320});
1809 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 192});
1810 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 320});
1811 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 384});
1812 b->Args({1, 8, 8, 1, 3, 0, 2, 1, 1, 1, 384, 384});
1813 b->Args({1, 8, 8, 3, 1, 2, 0, 1, 1, 1, 384, 384});
1814 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 448});
1815 b->Args({1, 8, 8, 3, 3, 2, 2, 1, 1, 1, 448, 384});
1816 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 192});
1817 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 320});
1818 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 384});
1819 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 448});
1820 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 192});
1821 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2048, 1001});
1822}
1823
1824static void ResNet18(benchmark::internal::Benchmark* b) {
1825 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1826
1827 /************************* Conv 1 *************************/
1828 /* N H W KH KW PH PW S D G GCin GCout */
1829 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1830 /************************ Conv 2.X ************************/
1831 /* N H W KH KW PH PW S D G GCin GCout */
1832 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1833 /************************ Conv 3.X ************************/
1834 /* N H W KH KW PH PW S D G GCin GCout */
1835 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 64, 128});
1836 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1837 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 64, 128});
1838 /************************ Conv 4.X ************************/
1839 /* N H W KH KW PH PW S D G GCin GCout */
1840 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 128, 256});
1841 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1842 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 128, 256});
1843 /************************ Conv 5.X ************************/
1844 /* N H W KH KW PH PW S D G GCin GCout */
1845 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 256, 512});
1846 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1847 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1848}
1849
1850static void ResNet50(benchmark::internal::Benchmark* b) {
1851 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1852
1853 /************************* Conv 1 *************************/
1854 /* N H W KH KW PH PW S D G GCin GCout */
1855 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1856 /************************ Conv 2.1 ************************/
1857 /* N H W KH KW PH PW S D G GCin GCout */
1858 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 64});
1859 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1860 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1861//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1862 /************************ Conv 2.X ************************/
1863 /* N H W KH KW PH PW S D G GCin GCout */
1864 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1865//b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1866//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1867 /************************ Conv 3.1 ************************/
1868 /* N H W KH KW PH PW S D G GCin GCout */
1869 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 128});
1870 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 128, 128});
1871 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1872 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1873 /************************ Conv 3.X ************************/
1874 /* N H W KH KW PH PW S D G GCin GCout */
1875 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 128});
1876 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1877//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1878 /************************ Conv 4.1 ************************/
1879 /* N H W KH KW PH PW S D G GCin GCout */
1880 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 256});
1881 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 256, 256});
1882 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1883 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 512, 1024});
1884 /************************ Conv 4.X ************************/
1885 /* N H W KH KW PH PW S D G GCin GCout */
1886 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 256});
1887 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1888//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1889 /************************ Conv 5.1 ************************/
1890 /* N H W KH KW PH PW S D G GCin GCout */
1891 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 512});
1892 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 512, 512});
1893 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1894 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 1024, 2048});
1895 /************************ Conv 5.X ************************/
1896 /* N H W KH KW PH PW S D G GCin GCout */
1897 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 2048, 512});
1898 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1899//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1900}
1901
1902static void VGG(benchmark::internal::Benchmark* b) {
1903 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1904
1905 /************************* Conv 1.1 ************************/
1906 /* N H W KH KW PH PW S D G GCin GCout */
1907 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 3, 64});
1908 /************************* Conv 1.2 ************************/
1909 /* N H W KH KW PH PW S D G GCin GCout */
1910 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1911
1912 /************************* Conv 2.1 ************************/
1913 /* N H W KH KW PH PW S D G GCin GCout */
1914 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 64, 128});
1915 /************************* Conv 2.2 ************************/
1916 /* N H W KH KW PH PW S D G GCin GCout */
1917 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1918
1919 /************************* Conv 3.1 ************************/
1920 /* N H W KH KW PH PW S D G GCin GCout */
1921 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 128, 256});
1922 /************************* Conv 3.2 ************************/
1923 /* N H W KH KW PH PW S D G GCin GCout */
1924 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1925 /************************* Conv 3.3 ************************/
1926 /* N H W KH KW PH PW S D G GCin GCout */
1927 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1928
1929 /************************* Conv 4.1 ************************/
1930 /* N H W KH KW PH PW S D G GCin GCout */
1931 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 256, 512});
1932 /************************* Conv 4.2 ************************/
1933 /* N H W KH KW PH PW S D G GCin GCout */
1934 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1935 /************************* Conv 4.3 ************************/
1936 /* N H W KH KW PH PW S D G GCin GCout */
1937 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1938
1939 /************************* Conv 5.X ************************/
1940 /* N H W KH KW PH PW S D G GCin GCout */
1941 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1942 /************************* Conv 5.3 ************************/
1943 /* N H W KH KW PH PW S D G GCin GCout */
1944 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1945}
1946
1947// SRCNN (9-1-5)
1948static void SRCNN915(benchmark::internal::Benchmark* b) {
1949 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1950
1951 /* N H W KH KW PH PW S D G GCin GCout */
1952 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1953 b->Args({1, 376, 376, 1, 1, 0, 0, 1, 1, 1, 64, 32});
1954 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1955}
1956
1957// SRCNN (9-3-5)
1958static void SRCNN935(benchmark::internal::Benchmark* b) {
1959 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1960
1961 /* N H W KH KW PH PW S D G GCin GCout */
1962 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1963 b->Args({1, 376, 376, 3, 3, 0, 0, 1, 1, 1, 64, 32});
1964 b->Args({1, 374, 374, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1965}
1966
1967// SRCNN (9-5-5)
1968static void SRCNN955(benchmark::internal::Benchmark* b) {
1969 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1970
1971 /* N H W KH KW PH PW S D G GCin GCout */
1972 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1973 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 64, 32});
1974 b->Args({1, 372, 372, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1975}
1976
Chao Meic6640272020-07-23 09:35:11 -07001977#ifndef XNN_NO_F16_OPERATORS
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07001978 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1979 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1980 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1981 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1982 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1983 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1984 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1985 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1986 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1987 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1988 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1989 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1990 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1991 BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1992 BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1993 BENCHMARK_CAPTURE(xnnpack_convolution_f16, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1994 BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1995 BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1996 BENCHMARK_CAPTURE(xnnpack_convolution_f16, vgg, "VGG")->Apply(VGG)->UseRealTime();
1997 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1998 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1999 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -07002000#endif // XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -07002001
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002002#ifndef XNN_NO_F32_OPERATORS
2003 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2004 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2005 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
2006 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
2007 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2008 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2009 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2010 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2011 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2012 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2013 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2014 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2015 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2016 BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2017 BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2018 BENCHMARK_CAPTURE(xnnpack_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2019 BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2020 BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2021 BENCHMARK_CAPTURE(xnnpack_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
2022 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2023 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2024 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2025#endif // XNN_NO_F32_OPERATORS
2026
2027#ifndef XNN_NO_QS8_OPERATORS
2028 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2029 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2030 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
2031 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
2032 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2033 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2034 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2035 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2036 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2037 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2038 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2039 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2040 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2041 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2042 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2043 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2044 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2045 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2046 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, vgg, "VGG")->Apply(VGG)->UseRealTime();
2047 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2048 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2049 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2050#endif // XNN_NO_QS8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002051
Chao Meic6640272020-07-23 09:35:11 -07002052#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002053 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2054 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2055 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
2056 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
2057 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2058 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2059 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2060 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2061 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2062 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2063 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2064 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2065 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2066 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2067 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2068 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2069 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2070 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2071 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, vgg, "VGG")->Apply(VGG)->UseRealTime();
2072 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2073 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2074 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -07002075#endif // XNN_NO_QU8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002076
2077#ifdef BENCHMARK_TENSORFLOW_LITE
2078 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2079 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2080 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
2081 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
2082 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2083 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2084 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2085 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2086 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2087 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2088 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2089 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2090 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2091 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2092 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2093 BENCHMARK_CAPTURE(tflite_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2094 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2095 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2096 BENCHMARK_CAPTURE(tflite_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
2097 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2098 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2099 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2100#endif // BENCHMARK_TENSORFLOW_LITE
2101
2102#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
2103 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
2104 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
2105 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
2106 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
2107 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
2108 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
2109 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
2110 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
2111 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
2112 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
2113 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
2114 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
2115 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
2116 BENCHMARK_CAPTURE(armcl_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
2117 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
2118 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
2119 BENCHMARK_CAPTURE(armcl_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
2120 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
2121 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
2122 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
2123#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
2124
2125#ifndef XNNPACK_BENCHMARK_NO_MAIN
2126BENCHMARK_MAIN();
2127#endif