blob: de3bd18bf0f9e1801dfa782e39844b862a379d04 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070013#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <ostream>
15#include <random>
16#include <string>
17#include <vector>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack.h>
20
Frank Barchardbb4c18b2019-09-30 11:05:52 -070021#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
22#include "arm_compute/core/Types.h"
23#include "arm_compute/runtime/Tensor.h"
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
26#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
27#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
XNNPACK Teamb455b122019-09-27 18:10:33 -070028#include <benchmark/benchmark.h>
Frank Barchard49b4dcc2020-06-26 14:07:19 -070029#include <fp16.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070030#ifdef BENCHMARK_TENSORFLOW_LITE
31#include "flatbuffers/include/flatbuffers/flatbuffers.h"
32#include "tensorflow/lite/interpreter.h"
33#include "tensorflow/lite/kernels/register.h"
34#include "tensorflow/lite/model.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070035#include "tensorflow/lite/schema/schema_generated.h"
36#include "tensorflow/lite/version.h"
37#endif // BENCHMARK_TENSORFLOW_LITE
Frank Barchardbb4c18b2019-09-30 11:05:52 -070038#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070039
Chao Meic6640272020-07-23 09:35:11 -070040#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -070041void xnnpack_convolution_qu8(benchmark::State& state, const char* net) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070042 const size_t batch_size = state.range(0);
43 const size_t input_height = state.range(1);
44 const size_t input_width = state.range(2);
45 const size_t kernel_height = state.range(3);
46 const size_t kernel_width = state.range(4);
47 const size_t padding_height = state.range(5);
48 const size_t padding_width = state.range(6);
49 const size_t subsampling = state.range(7);
50 const size_t dilation = state.range(8);
51 const size_t groups = state.range(9);
52 const size_t group_input_channels = state.range(10);
53 const size_t group_output_channels = state.range(11);
54
55 std::random_device random_device;
56 auto rng = std::mt19937(random_device());
57 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
Marat Dukhan5ce30d92020-04-14 03:31:26 -070058 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -070059
60 const size_t output_pixel_stride = groups * group_output_channels;
61 const size_t input_pixel_stride = groups * group_input_channels;
62 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
63 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
64 const size_t padding_left = padding_width / 2;
65 const size_t padding_top = padding_height / 2;
66 const size_t padding_right = padding_width - padding_left;
67 const size_t padding_bottom = padding_height - padding_top;
68 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
69 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
70
71 std::vector<uint8_t> input(batch_size * input_height * input_width * input_pixel_stride);
72 std::generate(input.begin(), input.end(), std::ref(u8rng));
73 std::vector<uint8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
74 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
75 std::vector<int32_t> bias(groups * group_output_channels);
76 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
77 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
78
Marat Dukhan04f03be2019-11-19 12:36:47 -080079 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070080 if (status != xnn_status_success) {
81 state.SkipWithError("failed to initialize XNNPACK");
82 return;
83 }
84
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -070086 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -070087 sizeof(uint8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(uint8_t) * output_elements);
88 std::vector<uint8_t> output(output_elements * num_buffers);
89
90 std::vector<xnn_operator_t> convolution_operators(num_buffers);
91 for (xnn_operator_t& convolution_op : convolution_operators) {
Marat Dukhan08b7a972020-07-14 18:17:29 -070092 status = xnn_create_convolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 padding_top, padding_right, padding_bottom, padding_left,
94 kernel_height, kernel_width,
95 subsampling, subsampling,
96 dilation, dilation,
97 groups, group_input_channels, group_output_channels,
98 input_pixel_stride, output_pixel_stride,
99 127, 0.5f,
100 127, 0.5f,
101 kernel.data(), bias.data(),
102 127, 0.5f, 0, 255,
103 0 /* flags */, &convolution_op);
104 if (status != xnn_status_success) {
105 state.SkipWithError("failed to create QINT8 Convolution operator");
106 return;
107 }
108 }
109
110 for (size_t i = 0; i < convolution_operators.size(); i++) {
Marat Dukhan08b7a972020-07-14 18:17:29 -0700111 status = xnn_setup_convolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 convolution_operators[i],
113 batch_size, input_height, input_width,
114 input.data(), output.data() + i * output_elements,
115 nullptr /* thread pool */);
116 if (status != xnn_status_success) {
117 state.SkipWithError("failed to setup QINT8 Convolution operator");
118 return;
119 }
120 }
121
122 size_t buffer_index = 0;
123 for (auto _ : state) {
124 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700125 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126 buffer_index = (buffer_index + 1) % num_buffers;
127 state.ResumeTiming();
128
129 status = xnn_run_operator(convolution_operators[buffer_index],
130 nullptr /* thread pool */);
131 if (status != xnn_status_success) {
132 state.SkipWithError("failed to run QINT8 Convolution operator");
133 return;
134 }
135 }
136
137 for (xnn_operator_t& convolution_op : convolution_operators) {
138 status = xnn_delete_operator(convolution_op);
139 if (status != xnn_status_success) {
140 state.SkipWithError("failed to delete QINT8 Convolution operator");
141 return;
142 }
143 convolution_op = nullptr;
144 }
145
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700146 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700147 state.counters["OPS"] = benchmark::Counter(
148 uint64_t(state.iterations()) * 2 *
149 batch_size * output_height * output_width *
150 groups * group_input_channels * group_output_channels *
151 kernel_height * kernel_width,
152 benchmark::Counter::kIsRate);
153}
Chao Meic6640272020-07-23 09:35:11 -0700154#endif // XNN_NO_QU8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700155
Chao Meic6640272020-07-23 09:35:11 -0700156#ifndef XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700157void xnnpack_convolution_f16(benchmark::State& state, const char* net) {
158 if (!benchmark::utils::CheckNEONFP16ARITH(state)) {
159 return;
160 }
161 const size_t batch_size = state.range(0);
162 const size_t input_height = state.range(1);
163 const size_t input_width = state.range(2);
164 const size_t kernel_height = state.range(3);
165 const size_t kernel_width = state.range(4);
166 const size_t padding_height = state.range(5);
167 const size_t padding_width = state.range(6);
168 const size_t subsampling = state.range(7);
169 const size_t dilation = state.range(8);
170 const size_t groups = state.range(9);
171 const size_t group_input_channels = state.range(10);
172 const size_t group_output_channels = state.range(11);
173
174 std::random_device random_device;
175 auto rng = std::mt19937(random_device());
176 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
177 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
178
179 const size_t output_pixel_stride = groups * group_output_channels;
180 const size_t input_pixel_stride = groups * group_input_channels;
181 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
182 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
183 const size_t padding_left = padding_width / 2;
184 const size_t padding_top = padding_height / 2;
185 const size_t padding_right = padding_width - padding_left;
186 const size_t padding_bottom = padding_height - padding_top;
187 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
188 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
189
190 std::vector<uint16_t> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(uint16_t));
191 std::generate(input.begin(), input.end(), std::ref(f16rng));
192 std::vector<uint16_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
193 std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));
194 std::vector<uint16_t> bias(groups * group_output_channels);
195 std::generate(bias.begin(), bias.end(), std::ref(f16rng));
196 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
197
198 xnn_status status = xnn_initialize(nullptr /* allocator */);
199 if (status != xnn_status_success) {
200 state.SkipWithError("failed to initialize XNNPACK");
201 return;
202 }
203
204 const size_t num_buffers = 1 +
205 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
206 sizeof(uint16_t) * (kernel.size() + bias.size() + output_elements));
207 std::vector<uint16_t> output(output_elements * num_buffers);
208
209 std::vector<xnn_operator_t> convolution_operators(num_buffers);
210 for (xnn_operator_t& convolution_op : convolution_operators) {
211 status = xnn_create_convolution2d_nhwc_f16(
212 padding_top, padding_right, padding_bottom, padding_left,
213 kernel_height, kernel_width,
214 subsampling, subsampling,
215 dilation, dilation,
216 groups, group_input_channels, group_output_channels,
217 input_pixel_stride, output_pixel_stride,
218 kernel.data(), bias.data(),
219 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
220 0 /* flags */, &convolution_op);
221 if (status != xnn_status_success) {
222 state.SkipWithError("failed to create FP16 Convolution operator");
223 return;
224 }
225 }
226
227 for (size_t i = 0; i < convolution_operators.size(); i++) {
228 status = xnn_setup_convolution2d_nhwc_f16(
229 convolution_operators[i],
230 batch_size, input_height, input_width,
231 input.data(), output.data() + i * output_elements,
232 nullptr /* thread pool */);
233 if (status != xnn_status_success) {
234 state.SkipWithError("failed to setup FP16 Convolution operator");
235 return;
236 }
237 }
238
239 size_t buffer_index = 0;
240 for (auto _ : state) {
241 state.PauseTiming();
242 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint16_t));
243 buffer_index = (buffer_index + 1) % num_buffers;
244 state.ResumeTiming();
245
246 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
247 if (status != xnn_status_success) {
248 state.SkipWithError("failed to run FP16 Convolution operator");
249 return;
250 }
251 }
252
253 for (xnn_operator_t& convolution_op : convolution_operators) {
254 status = xnn_delete_operator(convolution_op);
255 if (status != xnn_status_success) {
256 state.SkipWithError("failed to delete FP16 Convolution operator");
257 return;
258 }
259 convolution_op = nullptr;
260 }
261
262 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
263 state.counters["FLOPS"] = benchmark::Counter(
264 uint64_t(state.iterations()) * 2 *
265 batch_size * output_height * output_width *
266 groups * group_input_channels * group_output_channels *
267 kernel_height * kernel_width,
268 benchmark::Counter::kIsRate);
269}
Chao Meic6640272020-07-23 09:35:11 -0700270#endif // XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700271
XNNPACK Teamb455b122019-09-27 18:10:33 -0700272void xnnpack_convolution_f32(benchmark::State& state, const char* net) {
273 const size_t batch_size = state.range(0);
274 const size_t input_height = state.range(1);
275 const size_t input_width = state.range(2);
276 const size_t kernel_height = state.range(3);
277 const size_t kernel_width = state.range(4);
278 const size_t padding_height = state.range(5);
279 const size_t padding_width = state.range(6);
280 const size_t subsampling = state.range(7);
281 const size_t dilation = state.range(8);
282 const size_t groups = state.range(9);
283 const size_t group_input_channels = state.range(10);
284 const size_t group_output_channels = state.range(11);
285
286 std::random_device random_device;
287 auto rng = std::mt19937(random_device());
288 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
289
290 const size_t output_pixel_stride = groups * group_output_channels;
291 const size_t input_pixel_stride = groups * group_input_channels;
292 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
293 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
294 const size_t padding_left = padding_width / 2;
295 const size_t padding_top = padding_height / 2;
296 const size_t padding_right = padding_width - padding_left;
297 const size_t padding_bottom = padding_height - padding_top;
298 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
299 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
300
301 std::vector<float> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(float));
302 std::generate(input.begin(), input.end(), std::ref(f32rng));
303 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
304 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
305 std::vector<float> bias(groups * group_output_channels);
306 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
307 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
308
Marat Dukhan04f03be2019-11-19 12:36:47 -0800309 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700310 if (status != xnn_status_success) {
311 state.SkipWithError("failed to initialize XNNPACK");
312 return;
313 }
314
XNNPACK Teamb455b122019-09-27 18:10:33 -0700315 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -0700316 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700317 sizeof(float) * (kernel.size() + bias.size() + output_elements));
318 std::vector<float> output(output_elements * num_buffers);
319
320 std::vector<xnn_operator_t> convolution_operators(num_buffers);
321 for (xnn_operator_t& convolution_op : convolution_operators) {
322 status = xnn_create_convolution2d_nhwc_f32(
323 padding_top, padding_right, padding_bottom, padding_left,
324 kernel_height, kernel_width,
325 subsampling, subsampling,
326 dilation, dilation,
327 groups, group_input_channels, group_output_channels,
328 input_pixel_stride, output_pixel_stride,
329 kernel.data(), bias.data(),
330 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
331 0 /* flags */, &convolution_op);
332 if (status != xnn_status_success) {
333 state.SkipWithError("failed to create FP32 Convolution operator");
334 return;
335 }
336 }
337
338 for (size_t i = 0; i < convolution_operators.size(); i++) {
339 status = xnn_setup_convolution2d_nhwc_f32(
340 convolution_operators[i],
341 batch_size, input_height, input_width,
342 input.data(), output.data() + i * output_elements,
343 nullptr /* thread pool */);
344 if (status != xnn_status_success) {
345 state.SkipWithError("failed to setup FP32 Convolution operator");
346 return;
347 }
348 }
349
350 size_t buffer_index = 0;
351 for (auto _ : state) {
352 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700353 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700354 buffer_index = (buffer_index + 1) % num_buffers;
355 state.ResumeTiming();
356
357 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
358 if (status != xnn_status_success) {
359 state.SkipWithError("failed to run FP32 Convolution operator");
360 return;
361 }
362 }
363
364 for (xnn_operator_t& convolution_op : convolution_operators) {
365 status = xnn_delete_operator(convolution_op);
366 if (status != xnn_status_success) {
367 state.SkipWithError("failed to delete FP32 Convolution operator");
368 return;
369 }
370 convolution_op = nullptr;
371 }
372
373 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
374 state.counters["FLOPS"] = benchmark::Counter(
375 uint64_t(state.iterations()) * 2 *
376 batch_size * output_height * output_width *
377 groups * group_input_channels * group_output_channels *
378 kernel_height * kernel_width,
379 benchmark::Counter::kIsRate);
380}
381
382#ifdef BENCHMARK_TENSORFLOW_LITE
383void tflite_convolution_f32(benchmark::State& state, const char* net) {
384 const size_t batch_size = state.range(0);
385 const size_t input_height = state.range(1);
386 const size_t input_width = state.range(2);
387 const size_t kernel_height = state.range(3);
388 const size_t kernel_width = state.range(4);
389 const size_t padding_height = state.range(5);
390 const size_t padding_width = state.range(6);
391 const size_t subsampling = state.range(7);
392 const size_t dilation = state.range(8);
393 const size_t groups = state.range(9);
394 const size_t group_input_channels = state.range(10);
395 const size_t group_output_channels = state.range(11);
396
397 bool is_depthwise = false;
398 if (groups != 1) {
399 if (group_input_channels == 1) {
400 is_depthwise = true;
401 } else {
402 state.SkipWithError("grouped convolution is not supported");
403 return;
404 }
405 }
406
407 std::random_device random_device;
408 auto rng = std::mt19937(random_device());
409 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
410
411 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
412 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
413
414 tflite::Padding padding = tflite::Padding_VALID;
415 if (padding_width == (effective_kernel_width - 1) && padding_height == (effective_kernel_height - 1)) {
416 padding = tflite::Padding_SAME;
417 } else if (padding_width == 0 && padding_height == 0) {
418 padding = tflite::Padding_VALID;
419 } else {
420 state.SkipWithError("unsupported padding");
421 return;
422 }
423
424 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
425 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
426
427 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
428 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
429 std::vector<float> bias(groups * group_output_channels);
430 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
431
432 flatbuffers::FlatBufferBuilder builder;
433 flatbuffers::Offset<tflite::OperatorCode> operator_code =
434 CreateOperatorCode(
435 builder,
436 is_depthwise ? tflite::BuiltinOperator_DEPTHWISE_CONV_2D : tflite::BuiltinOperator_CONV_2D,
437 0);
438
439 flatbuffers::Offset<tflite::Conv2DOptions> conv2d_options = CreateConv2DOptions(
440 builder,
441 padding,
442 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
443 tflite::ActivationFunctionType_NONE,
444 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
445
446 flatbuffers::Offset<tflite::DepthwiseConv2DOptions> dwconv2d_options = CreateDepthwiseConv2DOptions(
447 builder,
448 padding,
449 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
450 static_cast<int32_t>(group_output_channels),
451 tflite::ActivationFunctionType_NONE,
452 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
453
454 flatbuffers::Offset<tflite::Buffer> buffers[3] = {
455 tflite::CreateBuffer(builder, builder.CreateVector({})),
456 tflite::CreateBuffer(builder, builder.CreateVector(
457 reinterpret_cast<const uint8_t*>(kernel.data()),
458 sizeof(float) * kernel.size())),
459 tflite::CreateBuffer(builder, builder.CreateVector(
460 reinterpret_cast<const uint8_t*>(bias.data()),
461 sizeof(float) * bias.size())),
462 };
463
464 const int32_t input_shape[4] = {
465 static_cast<int32_t>(batch_size),
466 static_cast<int32_t>(input_height),
467 static_cast<int32_t>(input_width),
468 static_cast<int32_t>(groups * group_input_channels)
469 };
470 const int32_t output_shape[4] = {
471 static_cast<int32_t>(batch_size),
472 static_cast<int32_t>(output_height),
473 static_cast<int32_t>(output_width),
474 static_cast<int32_t>(groups * group_output_channels)
475 };
476 const int32_t filter_shape[4] = {
477 static_cast<int32_t>(group_output_channels),
478 static_cast<int32_t>(kernel_height),
479 static_cast<int32_t>(kernel_width),
480 static_cast<int32_t>(groups * group_input_channels)
481 };
482 const int32_t bias_shape[1] = {
483 static_cast<int32_t>(groups * group_output_channels)
484 };
485
486 flatbuffers::Offset<tflite::Tensor> tensors[4] = {
487 tflite::CreateTensor(builder,
488 builder.CreateVector<int32_t>(input_shape, 4),
489 tflite::TensorType_FLOAT32,
490 0 /* buffer id */,
491 builder.CreateString("input")),
492 tflite::CreateTensor(builder,
493 builder.CreateVector<int32_t>(filter_shape, 4),
494 tflite::TensorType_FLOAT32,
495 1 /* buffer id */,
496 builder.CreateString("filter")),
497 tflite::CreateTensor(builder,
498 builder.CreateVector<int32_t>(bias_shape, 1),
499 tflite::TensorType_FLOAT32,
500 2 /* buffer id */,
501 builder.CreateString("bias")),
502 tflite::CreateTensor(builder,
503 builder.CreateVector<int32_t>(output_shape, 4),
504 tflite::TensorType_FLOAT32,
505 0 /* buffer id */,
506 builder.CreateString("output")),
507 };
508
509 const int32_t op_inputs[3] = { 0, 1, 2 };
510 const int32_t op_outputs[1] = { 3 };
511 flatbuffers::Offset<tflite::Operator> op = CreateOperator(
512 builder,
513 0 /* opcode_index */,
514 builder.CreateVector<int32_t>(op_inputs, 3),
515 builder.CreateVector<int32_t>(op_outputs, 1),
516 is_depthwise ? tflite::BuiltinOptions_DepthwiseConv2DOptions : tflite::BuiltinOptions_Conv2DOptions,
517 is_depthwise ? dwconv2d_options.Union() : conv2d_options.Union(),
518 /*custom_options */ 0,
519 tflite::CustomOptionsFormat_FLEXBUFFERS);
520
521 const int32_t graph_inputs[1] = { 0 };
522 const int32_t graph_outputs[1] = { 3 };
523 flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
524 builder,
525 builder.CreateVector(tensors, 4),
526 builder.CreateVector<int32_t>(graph_inputs, 1),
527 builder.CreateVector<int32_t>(graph_outputs, 1),
528 builder.CreateVector(&op, 1),
529 builder.CreateString("Conv2D subgraph"));
530
531 flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("Conv2D model");
532
533 flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
534 TFLITE_SCHEMA_VERSION,
535 builder.CreateVector(&operator_code, 1),
536 builder.CreateVector(&subgraph, 1),
537 description,
538 builder.CreateVector(buffers, 3));
539
540 builder.Finish(model_buffer);
541
542 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
543 tflite::ops::builtin::BuiltinOpResolver resolver;
544 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
545 std::unique_ptr<tflite::Interpreter> interpreter;
546 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
547 state.SkipWithError("failed to create TFLite interpreter");
548 return;
549 }
550 if (interpreter == nullptr) {
551 state.SkipWithError("TFLite interpreter is null");
552 return;
553 }
554 interpreter->SetNumThreads(1);
555
556 if (interpreter->AllocateTensors() != kTfLiteOk) {
557 state.SkipWithError("failed to allocate tensors");
558 return;
559 }
560
561 std::generate(
562 interpreter->typed_tensor<float>(0),
563 interpreter->typed_tensor<float>(0) + batch_size * groups * group_input_channels * input_height * input_width,
564 std::ref(f32rng));
565
566 for (auto _ : state) {
567 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700568 benchmark::utils::WipeCache();
569 benchmark::utils::PrefetchToL1(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700570 interpreter->typed_tensor<float>(0),
571 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
572 state.ResumeTiming();
573
574 if (interpreter->Invoke() != kTfLiteOk) {
575 state.SkipWithError("failed to invoke TFLite interpreter");
576 return;
577 }
578 }
579
580 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
581 state.counters["FLOPS"] = benchmark::Counter(
582 uint64_t(state.iterations()) * 2 *
583 batch_size * output_height * output_width *
584 groups * group_input_channels * group_output_channels *
585 kernel_height * kernel_width,
586 benchmark::Counter::kIsRate);
587
588 interpreter.reset();
589}
590#endif // BENCHMARK_TENSORFLOW_LITE
591
592#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
593static std::string compare_with_convolution_f32_reference_output(
594 const benchmark::State& state, const float* input, size_t input_size,
595 const float* kernel, size_t kernel_size, const float* bias, size_t bias_size,
596 const float* output, size_t output_size)
597{
598 const size_t batch_size = state.range(0);
599 const size_t input_height = state.range(1);
600 const size_t input_width = state.range(2);
601 const size_t kernel_height = state.range(3);
602 const size_t kernel_width = state.range(4);
603 const size_t padding_height = state.range(5);
604 const size_t padding_width = state.range(6);
605 const size_t subsampling = state.range(7);
606 const size_t dilation = state.range(8);
607 const size_t groups = state.range(9);
608 const size_t group_input_channels = state.range(10);
609 const size_t group_output_channels = state.range(11);
610
611 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
612 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
613 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
614 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
615 const size_t input_pixel_stride = groups * group_input_channels;
616 const size_t padding_left = padding_width / 2;
617 const size_t padding_top = padding_height / 2;
618
619 assert(input_size == batch_size * input_height * input_width * groups * group_input_channels);
620
621 assert(kernel_size == group_output_channels * kernel_height * kernel_width * groups * group_input_channels);
622
623 assert(bias_size == groups * group_output_channels);
624
625 assert(output_size == batch_size * output_height * output_width * groups * group_output_channels);
626
627 std::vector<float> output_ref(output_size);
628 for (size_t i = 0; i < batch_size; i++) {
629 for (size_t oy = 0; oy < output_height; oy++) {
630 for (size_t ox = 0; ox < output_width; ox++) {
631 for (size_t g = 0; g < groups; g++) {
632 for (size_t oc = 0; oc < group_output_channels; oc++) {
633 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] =
634 bias[g * group_output_channels + oc];
635 }
636 }
637 }
638 }
639 }
640 for (size_t i = 0; i < batch_size; i++) {
641 for (size_t oy = 0; oy < output_height; oy++) {
642 for (size_t ox = 0; ox < output_width; ox++) {
643 for (size_t ky = 0; ky < kernel_height; ky++) {
644 const size_t iy = oy * subsampling + ky * dilation - padding_top;
645 if (iy < input_height) {
646 for (size_t kx = 0; kx < kernel_width; kx++) {
647 const size_t ix = ox * subsampling + kx * dilation - padding_left;
648 if (ix < input_width) {
649 for (size_t g = 0; g < groups; g++) {
650 for (size_t oc = 0; oc < group_output_channels; oc++) {
651 for (size_t ic = 0; ic < group_input_channels; ic++) {
652 output_ref[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
653 input[((i * input_height + iy) * input_width + ix) * input_pixel_stride + g * group_input_channels + ic] *
654 kernel[(((oc * kernel_height + ky) * kernel_width + kx) * groups + g) * group_input_channels + ic];
655 } // group_input_channels loop
656 } // group_output_channels loop
657 } // groups loop
658 }
659 } // kernel_width loop
660 }
661 } // kernel_height loop
662 } // output_width loop
663 } // output_height loop
664 } // batch_size loop
665
666 const float relative_error_tolerance = 1e-4;
667 for (size_t i = 0; i < batch_size; i++) {
668 for (size_t y = 0; y < output_height; y++) {
669 for (size_t x = 0; x < output_width; x++) {
670 for (size_t g = 0; g < groups; g++) {
671 for (size_t c = 0; c < group_output_channels; c++) {
672 const size_t idx = (((i * output_height + y) * output_width + x) * groups + g) * group_output_channels + c;
673 const float value_ref = output_ref[idx];
674 const float value = output[idx];
675 if (std::abs(value - value_ref) > std::max(std::abs(value_ref) * relative_error_tolerance, std::numeric_limits<float>::epsilon())) {
676 std::ostringstream error_stream;
677 error_stream << "(x, y) = (" << x << ", " << y << "), group = " << g
678 << ", channel = " << c << ", refValue = " << value_ref
679 << ", actualValue = " << value
680 << ", absDiff=" << std::abs(value - value_ref);
681 return error_stream.str();
682 }
683 }
684 }
685 }
686 }
687 }
688 return "";
689}
690
691void armcl_convolution_f32(benchmark::State& state, const char* net) {
692 const size_t batch_size = state.range(0);
693 const size_t input_height = state.range(1);
694 const size_t input_width = state.range(2);
695 const size_t kernel_height = state.range(3);
696 const size_t kernel_width = state.range(4);
697 const size_t padding_height = state.range(5);
698 const size_t padding_width = state.range(6);
699 const size_t subsampling = state.range(7);
700 const size_t dilation = state.range(8);
701 const size_t groups = state.range(9);
702 const size_t group_input_channels = state.range(10);
703 const size_t group_output_channels = state.range(11);
704
705 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
706 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
707 const size_t padding_left = padding_width / 2;
708 const size_t padding_top = padding_height / 2;
709 const size_t padding_right = padding_width - padding_left;
710 const size_t padding_bottom = padding_height - padding_top;
711 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
712 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
713
714 arm_compute::PadStrideInfo pad_stride_info(
715 subsampling /* stride height */,
716 subsampling /* stride width */,
717 padding_left, padding_right, padding_top, padding_bottom,
718 arm_compute::DimensionRoundingType::FLOOR);
719 arm_compute::Size2D dilation_info(dilation, dilation);
720 // Note: activation is disabled by default.
721 arm_compute::ActivationLayerInfo activation_info;
722
723 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
724 arm_compute::TensorShape input_shape(
725 /* C */ groups * group_input_channels,
726 /* W */ input_width,
727 /* H */ input_height,
728 /* N */ batch_size);
729 arm_compute::TensorInfo input_info(
730 input_shape,
731 1 /* number of channels per element (!) */,
732 arm_compute::DataType::F32);
733 input_info.set_data_layout(arm_compute::DataLayout::NHWC);
734 arm_compute::Tensor input_tensor;
735 input_tensor.allocator()->init(input_info);
736 input_tensor.allocator()->allocate();
737
738 // Note: reverse order of dimensions, i.e. for IWHO for OHWI.
739 arm_compute::TensorShape kernel_shape(
740 /* I */ groups * group_input_channels,
741 /* W */ kernel_width,
742 /* H */ kernel_height,
743 /* O */ group_output_channels);
744 arm_compute::TensorInfo kernel_info(
745 kernel_shape,
746 1 /* number of channels per element (!) */,
747 arm_compute::DataType::F32);
748 kernel_info.set_data_layout(arm_compute::DataLayout::NHWC);
749 arm_compute::Tensor kernelTensor;
750 kernelTensor.allocator()->init(kernel_info);
751 kernelTensor.allocator()->allocate();
752
753 arm_compute::TensorShape bias_shape(groups * group_output_channels);
754 arm_compute::TensorInfo bias_info(
755 bias_shape,
756 1 /* number of channels per element (!) */,
757 arm_compute::DataType::F32);
758 bias_info.set_data_layout(arm_compute::DataLayout::NHWC);
759 arm_compute::Tensor bias_tensor;
760 bias_tensor.allocator()->init(bias_info);
761 bias_tensor.allocator()->allocate();
762
763 // Note: no batch size and reverse order of dimensions, i.e. CWHN for NHWC.
764 arm_compute::TensorShape output_shape(
765 /* C */ groups * group_output_channels,
766 /* W */ output_width,
767 /* H */ output_height,
768 /* N */ batch_size);
769 arm_compute::TensorInfo output_info(
770 output_shape,
771 1 /* number of channels per element (!) */,
772 arm_compute::DataType::F32);
773 output_info.set_data_layout(arm_compute::DataLayout::NHWC);
774 arm_compute::Tensor output_tensor;
775 output_tensor.allocator()->init(output_info);
776 output_tensor.allocator()->allocate();
777
778 std::random_device random_device;
779 auto rng = std::mt19937(random_device());
780 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
781
782 std::generate(
783 reinterpret_cast<float*>(input_tensor.buffer()),
784 reinterpret_cast<float*>(input_tensor.buffer()) + input_shape.total_size(),
785 std::ref(f32rng));
786 std::generate(
787 reinterpret_cast<float*>(kernelTensor.buffer()),
788 reinterpret_cast<float*>(kernelTensor.buffer()) + kernel_shape.total_size(),
789 std::ref(f32rng));
790 std::generate(
791 reinterpret_cast<float*>(bias_tensor.buffer()),
792 reinterpret_cast<float*>(bias_tensor.buffer()) + bias_shape.total_size(),
793 std::ref(f32rng));
794 std::generate(
795 reinterpret_cast<float*>(output_tensor.buffer()),
796 reinterpret_cast<float*>(output_tensor.buffer()) + output_shape.total_size(),
797 std::ref(f32rng));
798
799 bool is_depthwise = false;
800 if (groups != 1) {
801 // NEConvolutionLayer uses NEGEMMConvolutionLayer by default, which doesn't support grouped convolution.
802 // However, depthwise convolution is supported via NEDepthwiseConvolutionLayer.
803 if (group_input_channels == 1) {
804 is_depthwise = true;
805 } else {
806 state.SkipWithError("grouped convolution is not supported");
807 return;
808 }
809 }
810
811 std::shared_ptr<arm_compute::IFunction> layer;
812 if (is_depthwise) {
813 if (dilation != 1) {
814 state.SkipWithError("dilated depthwise convolution is not supported");
815 return;
816 }
817
818 // Avoid NEDepthwiseConvolutionLayer3x3 when stride isn't 2 in order to pass the output verification.
819 // TODO(b/130206370) This looks like a bug and needs further investigation.
820 if (kernel_height == 3 && kernel_width == 3 && subsampling == 2) {
821 auto* depthwise_3x3_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer3x3();
822 layer.reset(depthwise_3x3_convolution_layer);
823 depthwise_3x3_convolution_layer->configure(
824 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
825 pad_stride_info, group_output_channels, activation_info);
826
827 if (!depthwise_3x3_convolution_layer->validate(
828 &input_info, &kernel_info, &bias_info, &output_info,
829 pad_stride_info, group_output_channels, activation_info))
830 {
831 state.SkipWithError("validation failed");
832 return;
833 }
834 } else {
835 auto* depthwise_convolution_layer = new arm_compute::NEDepthwiseConvolutionLayer();
836 layer.reset(depthwise_convolution_layer);
837 depthwise_convolution_layer->configure(
838 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
839 pad_stride_info, group_output_channels, activation_info);
840
841 if (!depthwise_convolution_layer->validate(
842 &input_info, &kernel_info, &bias_info, &output_info,
843 pad_stride_info, group_output_channels, activation_info))
844 {
845 state.SkipWithError("validation failed");
846 return;
847 }
848 }
849 } else {
850 auto* convolution_layer = new arm_compute::NEConvolutionLayer();
851 layer.reset(convolution_layer);
852 convolution_layer->configure(
853 &input_tensor, &kernelTensor, &bias_tensor, &output_tensor,
854 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
855 true /* enable fast math */, groups);
856
857 if (!convolution_layer->validate(
858 &input_info, &kernel_info, &bias_info, &output_info,
859 pad_stride_info, arm_compute::WeightsInfo(), dilation_info, activation_info,
860 true /* enable fast math */, groups))
861 {
862 state.SkipWithError("validation failed");
863 return;
864 }
865 }
866
867 // Dry run to let ACL do one-time initializations.
868 arm_compute::CPPScheduler::get().set_num_threads(1);
869 layer->run();
870
871 for (auto _ : state) {
872 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700873 benchmark::utils::WipeCache();
874 benchmark::utils::PrefetchToL1(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700875 input_tensor.buffer(),
876 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
877 state.ResumeTiming();
878
879 layer->run();
880 }
881
882 // Validate outputs.
883 const std::string error_string = compare_with_convolution_f32_reference_output(
884 state, reinterpret_cast<const float*>(input_tensor.buffer()),
885 input_shape.total_size(),
886 reinterpret_cast<const float*>(kernelTensor.buffer()),
887 kernel_shape.total_size(),
888 reinterpret_cast<const float*>(bias_tensor.buffer()),
889 bias_shape.total_size(),
890 reinterpret_cast<const float*>(output_tensor.buffer()),
891 output_shape.total_size());
892
893 if (!error_string.empty()) {
894 state.SkipWithError(("validation failed: " + error_string).c_str());
895 return;
896 }
897
898 input_tensor.allocator()->free();
899 kernelTensor.allocator()->free();
900 bias_tensor.allocator()->free();
901 output_tensor.allocator()->free();
902
903 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
904 state.counters["FLOPS"] = benchmark::Counter(
905 uint64_t(state.iterations()) * 2 *
906 batch_size * output_height * output_width *
907 groups * group_input_channels * group_output_channels *
908 kernel_height * kernel_width,
909 benchmark::Counter::kIsRate);
910}
911#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
912
913// ShuffleNet v1 with 1 group.
914static void ShuffleNetV1G1(benchmark::internal::Benchmark* b) {
915 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
916
917 /*************************** Conv 1 **************************/
918 /* N H W KH KW PH PW S D G GCin GCout */
919 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
920 /******************* Stage 2: stride-2 unit ******************/
921 /* N H W KH KW PH PW S D G GCin GCout */
922 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 36});
923 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 36, 1, 1});
924 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 120});
925 /******************* Stage 2: stride-1 units *****************/
926 /* N H W KH KW PH PW S D G GCin GCout */
927 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 36});
928 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 36, 1, 1});
929 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 144});
930 /******************* Stage 3: stride-2 unit ******************/
931 /* N H W KH KW PH PW S D G GCin GCout */
932 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 72});
933 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 72, 1, 1});
934 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 144});
935 /******************* Stage 3: stride-1 units *****************/
936 /* N H W KH KW PH PW S D G GCin GCout */
937 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 72});
938 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 72, 1, 1});
939 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 288});
940 /******************* Stage 4: stride-2 unit ******************/
941 /* N H W KH KW PH PW S D G GCin GCout */
942 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 144});
943 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 144, 1, 1});
944 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 288});
945 /******************* Stage 4: stride-1 units *****************/
946 /* N H W KH KW PH PW S D G GCin GCout */
947 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 144});
948 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 144, 1, 1});
949 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 576});
950}
951
952// ShuffleNet v1 with 2 groups.
953static void ShuffleNetV1G2(benchmark::internal::Benchmark* b) {
954 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
955
956 /*************************** Conv 1 **************************/
957 /* N H W KH KW PH PW S D G GCin GCout */
958 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
959 /******************* Stage 2: stride-2 unit ******************/
960 /* N H W KH KW PH PW S D G GCin GCout */
961 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 50});
962 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 50, 1, 1});
963 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 88});
964 /******************* Stage 2: stride-1 units *****************/
965 /* N H W KH KW PH PW S D G GCin GCout */
966 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 25});
967 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 50, 1, 1});
968 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 100});
969 /******************* Stage 3: stride-2 unit ******************/
970 /* N H W KH KW PH PW S D G GCin GCout */
971 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 50});
972 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 100, 1, 1});
973 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 100});
974 /******************* Stage 3: stride-1 units *****************/
975 /* N H W KH KW PH PW S D G GCin GCout */
976 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 50});
977 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 100, 1, 1});
978 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 200});
979 /******************* Stage 4: stride-2 unit ******************/
980 /* N H W KH KW PH PW S D G GCin GCout */
981 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 100});
982 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 200, 1, 1});
983 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 200});
984 /******************* Stage 4: stride-1 units *****************/
985 /* N H W KH KW PH PW S D G GCin GCout */
986 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 400, 100});
987 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 200, 1, 1});
988 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 400});
989}
990
991// ShuffleNet v1 with 3 groups.
992static void ShuffleNetV1G3(benchmark::internal::Benchmark* b) {
993 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
994
995 /*************************** Conv 1 **************************/
996 /* N H W KH KW PH PW S D G GCin GCout */
997 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
998 /******************* Stage 2: stride-2 unit ******************/
999 /* N H W KH KW PH PW S D G GCin GCout */
1000 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 60});
1001 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 60, 1, 1});
1002 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 72});
1003 /******************* Stage 2: stride-1 units *****************/
1004 /* N H W KH KW PH PW S D G GCin GCout */
1005 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 20});
1006 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 60, 1, 1});
1007 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 80});
1008 /******************* Stage 3: stride-2 unit ******************/
1009 /* N H W KH KW PH PW S D G GCin GCout */
1010 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 40});
1011 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 120, 1, 1});
1012 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 80});
1013 /******************* Stage 3: stride-1 units *****************/
1014 /* N H W KH KW PH PW S D G GCin GCout */
1015 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 40});
1016 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 120, 1, 1});
1017 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 160});
1018 /******************* Stage 4: stride-2 unit ******************/
1019 /* N H W KH KW PH PW S D G GCin GCout */
1020 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 80});
1021 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1022 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 160});
1023 /******************* Stage 4: stride-1 units *****************/
1024 /* N H W KH KW PH PW S D G GCin GCout */
1025 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 320, 80});
1026 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1027 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 320});
1028}
1029
1030// ShuffleNet v1 with 4 groups.
1031static void ShuffleNetV1G4(benchmark::internal::Benchmark* b) {
1032 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1033
1034 /*************************** Conv 1 **************************/
1035 /* N H W KH KW PH PW S D G GCin GCout */
1036 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1037 /******************* Stage 2: stride-2 unit ******************/
1038 /* N H W KH KW PH PW S D G GCin GCout */
1039 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 68});
1040 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 68, 1, 1});
1041 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 62});
1042 /******************* Stage 2: stride-1 units *****************/
1043 /* N H W KH KW PH PW S D G GCin GCout */
1044 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 17});
1045 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 68, 1, 1});
1046 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 68});
1047 /******************* Stage 3: stride-2 unit ******************/
1048 /* N H W KH KW PH PW S D G GCin GCout */
1049 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 34});
1050 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 136, 1, 1});
1051 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 68});
1052 /******************* Stage 3: stride-1 units *****************/
1053 /* N H W KH KW PH PW S D G GCin GCout */
1054 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 34});
1055 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 136, 1, 1});
1056 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 136});
1057 /******************* Stage 4: stride-2 unit ******************/
1058 /* N H W KH KW PH PW S D G GCin GCout */
1059 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 68});
1060 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 272, 1, 1});
1061 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 136});
1062 /******************* Stage 4: stride-1 units *****************/
1063 /* N H W KH KW PH PW S D G GCin GCout */
1064 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 272, 68});
1065 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 272, 1, 1});
1066 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 272});
1067}
1068
1069// ShuffleNet v1 with 8 groups.
1070static void ShuffleNetV1G8(benchmark::internal::Benchmark* b) {
1071 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1072
1073 /*************************** Conv 1 **************************/
1074 /* N H W KH KW PH PW S D G GCin GCout */
1075 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1076 /******************* Stage 2: stride-2 unit ******************/
1077 /* N H W KH KW PH PW S D G GCin GCout */
1078 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1079 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1080 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 45});
1081 /******************* Stage 2: stride-1 units *****************/
1082 /* N H W KH KW PH PW S D G GCin GCout */
1083 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 12});
1084 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1085 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 48});
1086 /******************* Stage 3: stride-2 unit ******************/
1087 /* N H W KH KW PH PW S D G GCin GCout */
1088 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 24});
1089 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1090 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 48});
1091 /******************* Stage 3: stride-1 units *****************/
1092 /* N H W KH KW PH PW S D G GCin GCout */
1093 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 24});
1094 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1095 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 96});
1096 /******************* Stage 4: stride-2 unit ******************/
1097 /* N H W KH KW PH PW S D G GCin GCout */
1098 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 48});
1099 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 384, 1, 1});
1100 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 96});
1101 /******************* Stage 4: stride-1 units *****************/
1102 /* N H W KH KW PH PW S D G GCin GCout */
1103 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 192, 48});
1104 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 384, 1, 1});
1105 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 192});
1106}
1107
1108// ShuffleNet v2 (0.5X scale)
1109static void ShuffleNetV2X05(benchmark::internal::Benchmark* b) {
1110 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1111
1112 /*************************** Conv 1 **************************/
1113 /* N H W KH KW PH PW S D G GCin GCout */
1114 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1115 /************************** Stage 2 **************************/
1116 /* N H W KH KW PH PW S D G GCin GCout */
1117 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1118 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1119 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 24});
1120 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 24, 1, 1});
1121 /************************** Stage 3 **************************/
1122 /* N H W KH KW PH PW S D G GCin GCout */
1123 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 48, 1, 1});
1124 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1125 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 48, 48});
1126 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 48, 1, 1});
1127 /************************** Stage 4 **************************/
1128 /* N H W KH KW PH PW S D G GCin GCout */
1129 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1130 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1131 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 96});
1132 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 96, 1, 1});
1133 /*************************** Conv 5 **************************/
1134 /* N H W KH KW PH PW S D G GCin GCout */
1135 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 192, 1024});
1136}
1137
1138// ShuffleNet v2 (1.0X scale)
1139static void ShuffleNetV2X10(benchmark::internal::Benchmark* b) {
1140 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1141
1142 /*************************** Conv 1 **************************/
1143 /* N H W KH KW PH PW S D G GCin GCout */
1144 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1145 /************************** Stage 2 **************************/
1146 /* N H W KH KW PH PW S D G GCin GCout */
1147 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1148 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1149 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 58});
1150 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 58, 1, 1});
1151 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 58, 58});
1152 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 58, 1, 1});
1153 /************************** Stage 3 **************************/
1154 /* N H W KH KW PH PW S D G GCin GCout */
1155 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 116, 1, 1});
1156 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1157 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 116, 116});
1158 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 116, 1, 1});
1159 /************************** Stage 4 **************************/
1160 /* N H W KH KW PH PW S D G GCin GCout */
1161 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 232, 1, 1});
1162 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1163 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 232, 232});
1164 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 232, 1, 1});
1165 /*************************** Conv 5 **************************/
1166 /* N H W KH KW PH PW S D G GCin GCout */
1167 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 464, 1024});
1168}
1169
1170// ShuffleNet v2 (1.5X scale)
1171static void ShuffleNetV2X15(benchmark::internal::Benchmark* b) {
1172 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1173
1174 /*************************** Conv 1 **************************/
1175 /* N H W KH KW PH PW S D G GCin GCout */
1176 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1177 /************************** Stage 2 **************************/
1178 /* N H W KH KW PH PW S D G GCin GCout */
1179 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1180 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1181 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1182 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 88, 1, 1});
1183 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 88});
1184 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1185 /************************** Stage 3 **************************/
1186 /* N H W KH KW PH PW S D G GCin GCout */
1187 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 176, 1, 1});
1188 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1189 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 176, 176});
1190 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 176, 1, 1});
1191 /************************** Stage 4 **************************/
1192 /* N H W KH KW PH PW S D G GCin GCout */
1193 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 352, 1, 1});
1194 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1195 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1196 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 352, 1, 1});
1197 /*************************** Conv 5 **************************/
1198 /* N H W KH KW PH PW S D G GCin GCout */
1199 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 704, 1024});
1200}
1201
1202// ShuffleNet v2 (2.0X scale)
1203static void ShuffleNetV2X20(benchmark::internal::Benchmark* b) {
1204 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1205
1206 /*************************** Conv 1 **************************/
1207 /* N H W KH KW PH PW S D G GCin GCout */
1208 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1209 /************************** Stage 2 **************************/
1210 /* N H W KH KW PH PW S D G GCin GCout */
1211 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1212 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1213 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1214 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 122, 1, 1});
1215 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 122, 122});
1216 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 122, 1, 1});
1217 /************************** Stage 3 **************************/
1218 /* N H W KH KW PH PW S D G GCin GCout */
1219 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 244, 1, 1});
1220 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1221 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1222 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 244, 1, 1});
1223 /************************** Stage 4 **************************/
1224 /* N H W KH KW PH PW S D G GCin GCout */
1225 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 488, 1, 1});
1226 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1227 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1228 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 488, 1, 1});
1229 /*************************** Conv 5 **************************/
1230 /* N H W KH KW PH PW S D G GCin GCout */
1231 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 976, 2048});
1232}
1233
1234static void MobileNetV1(benchmark::internal::Benchmark* b) {
1235 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1236
1237 /* N H W KH KW PH PW S D G GCin GCout */
1238 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1239 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1240 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 64});
1241 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1242 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 128});
1243 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 128, 1, 1});
1244 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 128, 128});
1245 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 128, 1, 1});
1246 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 256});
1247 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 256, 1, 1});
1248 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1249 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 256, 1, 1});
1250 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 512});
1251 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 512, 1, 1});
1252 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1253 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 512, 1, 1});
1254 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 1024});
1255 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1024, 1, 1});
1256 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 1024, 1024});
1257}
1258
1259static void MobileNetV2(benchmark::internal::Benchmark* b) {
1260 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1261
1262 /* N H W KH KW PH PW S D G GCin GCout */
1263 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1264
1265 /************************ Bottleneck 1 ***********************/
1266 /* N H W KH KW PH PW S D G GCin GCout */
1267 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1268 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 16});
1269
1270 /************************ Bottleneck 2 ***********************/
1271 /* N H W KH KW PH PW S D G GCin GCout */
1272 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 96});
1273 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1274 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1275 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1276 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 144, 1, 1});
1277 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 144, 24});
1278
1279 /************************ Bottleneck 3 ***********************/
1280 /* N H W KH KW PH PW S D G GCin GCout */
1281//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1282 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1283 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 32});
1284 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1285 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1286 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1287//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1288//b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1289//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1290
1291 /************************ Bottleneck 4 ***********************/
1292 /* N H W KH KW PH PW S D G GCin GCout */
1293//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1294 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1295 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1296 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1297 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1298 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1299//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1300//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1301//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1302//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1303//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1304//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1305
1306 /************************ Bottleneck 5 ***********************/
1307 /* N H W KH KW PH PW S D G GCin GCout */
1308//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1309//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1310 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 96});
1311 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1312 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1313 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1314//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1315//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1316//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1317
1318 /************************ Bottleneck 6 ***********************/
1319 /* N H W KH KW PH PW S D G GCin GCout */
1320//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1321 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 576, 1, 1});
1322 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 160});
1323 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1324 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1325 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1326//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1327//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1328//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1329
1330 /************************ Bottleneck 7 ***********************/
1331 /* N H W KH KW PH PW S D G GCin GCout */
1332//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1333//b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1334 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 320});
1335
1336 /******************** Pre-pooling Conv2D *********************/
1337 /* N H W KH KW PH PW S D G GCin GCout */
1338 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 320, 1280});
1339 /******************** Post-pooling Conv2D ********************/
1340 /* N H W KH KW PH PW S D G GCin GCout */
1341 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1000});
1342}
1343
1344static void MobileNetV3Small(benchmark::internal::Benchmark* b) {
1345 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1346
1347 /*********************** Initial Stage ***********************/
1348 /* N H W KH KW PH PW S D G GCin GCout */
1349 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1350 /*********************** Bottleneck 1 ************************/
1351 /* N H W KH KW PH PW S D G GCin GCout */
1352 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 16, 1, 1});
1353 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 16, 8});
1354 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 8, 16});
1355 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1356 /*********************** Bottleneck 2 ************************/
1357 /* N H W KH KW PH PW S D G GCin GCout */
1358 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 72});
1359 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1360 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1361 /*********************** Bottleneck 3 ************************/
1362 /* N H W KH KW PH PW S D G GCin GCout */
1363 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1364 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1365 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 24});
1366 /*********************** Bottleneck 4 ************************/
1367 /* N H W KH KW PH PW S D G GCin GCout */
1368 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1369 b->Args({1, 28, 28, 5, 5, 4, 4, 2, 1, 96, 1, 1});
1370 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1371 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1372 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 40});
1373 /*********************** Bottleneck 5 ************************/
1374 /* N H W KH KW PH PW S D G GCin GCout */
1375 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1376 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1377 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1378 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1379 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1380 /*********************** Bottleneck 6 ************************/
1381 /* N H W KH KW PH PW S D G GCin GCout */
1382//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1383//b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1384//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1385//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1386//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1387 /*********************** Bottleneck 7 ************************/
1388 /* N H W KH KW PH PW S D G GCin GCout */
1389 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1390 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1391 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1392 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1393 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 120, 48});
1394 /*********************** Bottleneck 8 ************************/
1395 /* N H W KH KW PH PW S D G GCin GCout */
1396 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 144});
1397 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 144, 1, 1});
1398 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 40});
1399 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 40, 144});
1400 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 144, 48});
1401 /*********************** Bottleneck 9 ************************/
1402 /* N H W KH KW PH PW S D G GCin GCout */
1403 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 288});
1404 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 288, 1, 1});
1405 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 288, 72});
1406 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 288});
1407 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 288, 96});
1408 /*********************** Bottleneck 10 ***********************/
1409 /* N H W KH KW PH PW S D G GCin GCout */
1410 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1411 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1412 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1413 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1414 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1415 /*********************** Bottleneck 11 ***********************/
1416 /* N H W KH KW PH PW S D G GCin GCout */
1417//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1418//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1419//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1420//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1421//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1422 /************************ Last Stage ************************/
1423 /* N H W KH KW PH PW S D G GCin GCout */
1424//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1425 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 1024});
1426 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1024, 1001});
1427}
1428
1429static void MobileNetV3Large(benchmark::internal::Benchmark* b) {
1430 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1431
1432 /*********************** Initial Stage ***********************/
1433 /* N H W KH KW PH PW S D G GCin GCout */
1434 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1435 /*********************** Bottleneck 1 ************************/
1436 /* N H W KH KW PH PW S D G GCin GCout */
1437 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 16, 1, 1});
1438 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1439 /*********************** Bottleneck 2 ************************/
1440 /* N H W KH KW PH PW S D G GCin GCout */
1441 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1442 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1443 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 24});
1444 /*********************** Bottleneck 3 ************************/
1445 /* N H W KH KW PH PW S D G GCin GCout */
1446 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1447 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 72, 1, 1});
1448 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1449 /*********************** Bottleneck 4 ************************/
1450 /* N H W KH KW PH PW S D G GCin GCout */
1451//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1452 b->Args({1, 56, 56, 5, 5, 4, 4, 2, 1, 72, 1, 1});
1453 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1454 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1455 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 40});
1456 /*********************** Bottleneck 5 ************************/
1457 /* N H W KH KW PH PW S D G GCin GCout */
1458 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1459 b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1460 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1461 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1462 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1463 /*********************** Bottleneck 6 ************************/
1464 /* N H W KH KW PH PW S D G GCin GCout */
1465//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1466//b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1467//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1468//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1469//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1470 /*********************** Bottleneck 7 ************************/
1471 /* N H W KH KW PH PW S D G GCin GCout */
1472 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1473 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1474 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 80});
1475 /*********************** Bottleneck 8 ************************/
1476 /* N H W KH KW PH PW S D G GCin GCout */
1477 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 200});
1478 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 200, 1, 1});
1479 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 200, 80});
1480 /*********************** Bottleneck 9 ************************/
1481 /* N H W KH KW PH PW S D G GCin GCout */
1482 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1483 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1484 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1485 /********************** Bottleneck 10 ***********************/
1486 /* N H W KH KW PH PW S D G GCin GCout */
1487//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1488//b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1489//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1490 /********************** Bottleneck 11 ***********************/
1491 /* N H W KH KW PH PW S D G GCin GCout */
1492 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 480});
1493 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 480, 1, 1});
1494 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 480, 120});
1495 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 480});
1496 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 480, 112});
1497 /********************** Bottleneck 12 ***********************/
1498 /* N H W KH KW PH PW S D G GCin GCout */
1499 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1500 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 672, 1, 1});
1501 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 672, 168});
1502 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 168, 672});
1503 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 672, 112});
1504 /********************** Bottleneck 13 ***********************/
1505 /* N H W KH KW PH PW S D G GCin GCout */
1506//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1507 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 672, 1, 1});
1508 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 672, 160});
1509 /********************** Bottleneck 14 ***********************/
1510 /* N H W KH KW PH PW S D G GCin GCout */
1511 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1512 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1513 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1514 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1515 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1516 /********************** Bottleneck 15 ***********************/
1517 /* N H W KH KW PH PW S D G GCin GCout */
1518//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1519//b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1520//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1521//b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1522//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1523 /************************ Last Stage ***********************/
1524 /* N H W KH KW PH PW S D G GCin GCout */
1525//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1526 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 1280});
1527 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1001});
1528}
1529
1530// SqueezeNet 1.0
1531static void SqueezeNetV10(benchmark::internal::Benchmark* b) {
1532 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1533
1534 /************************** Conv 1 *************************/
1535 /* N H W KH KW PH PW S D G GCin GCout */
1536 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 96});
1537 /************************** Fire 2 *************************/
1538 /* N H W KH KW PH PW S D G GCin GCout */
1539 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 96, 16});
1540 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1541 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1542 /************************** Fire 3 *************************/
1543 /* N H W KH KW PH PW S D G GCin GCout */
1544 b->Args({1, 56, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1545//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1546//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1547 /************************** Fire 4 *************************/
1548 /* N H W KH KW PH PW S D G GCin GCout */
1549 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1550 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1551 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1552 /************************** Fire 5 *************************/
1553 /* N H W KH KW PH PW S D G GCin GCout */
1554 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1555 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1556 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1557 /************************** Fire 6 *************************/
1558 /* N H W KH KW PH PW S D G GCin GCout */
1559 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1560 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1561 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1562 /************************** Fire 7 *************************/
1563 /* N H W KH KW PH PW S D G GCin GCout */
1564 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1565//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1566//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1567 /************************** Fire 8 *************************/
1568 /* N H W KH KW PH PW S D G GCin GCout */
1569 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1570 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1571 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1572 /************************** Fire 9 *************************/
1573 /* N H W KH KW PH PW S D G GCin GCout */
1574 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1575 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1576 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1577 /************************* Conv 10 *************************/
1578 /* N H W KH KW PH PW S D G GCin GCout */
1579 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1580}
1581
1582// SqueezeNet 1.1
1583static void SqueezeNetV11(benchmark::internal::Benchmark* b) {
1584 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1585
1586 /************************** Conv 1 *************************/
1587 /* N H W KH KW PH PW S D G GCin GCout */
1588 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 64});
1589 /************************** Fire 2 *************************/
1590 /* N H W KH KW PH PW S D G GCin GCout */
1591 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 64, 16});
1592 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1593 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1594 /************************** Fire 3 *************************/
1595 /* N H W KH KW PH PW S D G GCin GCout */
1596 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1597//b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1598//b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1599 /************************** Fire 4 *************************/
1600 /* N H W KH KW PH PW S D G GCin GCout */
1601 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1602 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1603 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1604 /************************** Fire 5 *************************/
1605 /* N H W KH KW PH PW S D G GCin GCout */
1606 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1607//b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1608//b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1609 /************************** Fire 6 *************************/
1610 /* N H W KH KW PH PW S D G GCin GCout */
1611 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1612 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1613 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1614 /************************** Fire 7 *************************/
1615 /* N H W KH KW PH PW S D G GCin GCout */
1616 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1617//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1618//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1619 /************************** Fire 8 *************************/
1620 /* N H W KH KW PH PW S D G GCin GCout */
1621 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1622 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1623 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1624 /************************** Fire 9 *************************/
1625 /* N H W KH KW PH PW S D G GCin GCout */
1626 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1627//b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1628//b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1629 /************************* Conv 10 *************************/
1630 /* N H W KH KW PH PW S D G GCin GCout */
1631 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1632}
1633
1634static void InceptionV3(benchmark::internal::Benchmark* b) {
1635 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1636
1637 /* N H W KH KW PH PW S D G GCin GCout */
1638 b->Args({1, 299, 299, 3, 3, 0, 0, 2, 1, 1, 3, 32});
1639 b->Args({1, 149, 149, 3, 3, 0, 0, 1, 1, 1, 32, 32});
1640 b->Args({1, 147, 147, 3, 3, 2, 2, 1, 1, 1, 32, 64});
1641 b->Args({1, 73, 73, 1, 1, 0, 0, 1, 1, 1, 64, 80});
1642 b->Args({1, 73, 73, 3, 3, 0, 0, 1, 1, 1, 80, 192});
1643 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1644 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 48});
1645 b->Args({1, 35, 35, 5, 5, 4, 4, 1, 1, 1, 48, 64});
1646 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 64, 96});
1647 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 96, 96});
1648 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1649 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1650 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1651 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 64});
1652 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 48});
1653 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 288, 384});
1654 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 96, 96});
1655 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 192});
1656 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 128});
1657 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 128});
1658 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 192});
1659 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 128});
1660 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 192});
1661 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 160});
1662 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 160});
1663 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 192});
1664 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 160});
1665 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 192});
1666 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 192, 192});
1667 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 192, 192});
1668 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 320});
1669 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 192});
1670 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 320});
1671 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 384});
1672 b->Args({1, 8, 8, 1, 3, 0, 2, 1, 1, 1, 384, 384});
1673 b->Args({1, 8, 8, 3, 1, 2, 0, 1, 1, 1, 384, 384});
1674 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 448});
1675 b->Args({1, 8, 8, 3, 3, 2, 2, 1, 1, 1, 448, 384});
1676 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 192});
1677 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 320});
1678 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 384});
1679 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 448});
1680 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 192});
1681 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2048, 1001});
1682}
1683
1684static void ResNet18(benchmark::internal::Benchmark* b) {
1685 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1686
1687 /************************* Conv 1 *************************/
1688 /* N H W KH KW PH PW S D G GCin GCout */
1689 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1690 /************************ Conv 2.X ************************/
1691 /* N H W KH KW PH PW S D G GCin GCout */
1692 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1693 /************************ Conv 3.X ************************/
1694 /* N H W KH KW PH PW S D G GCin GCout */
1695 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 64, 128});
1696 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1697 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 64, 128});
1698 /************************ Conv 4.X ************************/
1699 /* N H W KH KW PH PW S D G GCin GCout */
1700 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 128, 256});
1701 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1702 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 128, 256});
1703 /************************ Conv 5.X ************************/
1704 /* N H W KH KW PH PW S D G GCin GCout */
1705 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 256, 512});
1706 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1707 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1708}
1709
1710static void ResNet50(benchmark::internal::Benchmark* b) {
1711 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1712
1713 /************************* Conv 1 *************************/
1714 /* N H W KH KW PH PW S D G GCin GCout */
1715 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1716 /************************ Conv 2.1 ************************/
1717 /* N H W KH KW PH PW S D G GCin GCout */
1718 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 64});
1719 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1720 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1721//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1722 /************************ Conv 2.X ************************/
1723 /* N H W KH KW PH PW S D G GCin GCout */
1724 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1725//b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1726//b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1727 /************************ Conv 3.1 ************************/
1728 /* N H W KH KW PH PW S D G GCin GCout */
1729 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 128});
1730 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 128, 128});
1731 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1732 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1733 /************************ Conv 3.X ************************/
1734 /* N H W KH KW PH PW S D G GCin GCout */
1735 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 128});
1736 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1737//b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1738 /************************ Conv 4.1 ************************/
1739 /* N H W KH KW PH PW S D G GCin GCout */
1740 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 256});
1741 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 256, 256});
1742 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1743 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 512, 1024});
1744 /************************ Conv 4.X ************************/
1745 /* N H W KH KW PH PW S D G GCin GCout */
1746 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 256});
1747 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1748//b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1749 /************************ Conv 5.1 ************************/
1750 /* N H W KH KW PH PW S D G GCin GCout */
1751 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 512});
1752 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 512, 512});
1753 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1754 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 1024, 2048});
1755 /************************ Conv 5.X ************************/
1756 /* N H W KH KW PH PW S D G GCin GCout */
1757 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 2048, 512});
1758 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1759//b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1760}
1761
1762static void VGG(benchmark::internal::Benchmark* b) {
1763 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1764
1765 /************************* Conv 1.1 ************************/
1766 /* N H W KH KW PH PW S D G GCin GCout */
1767 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 3, 64});
1768 /************************* Conv 1.2 ************************/
1769 /* N H W KH KW PH PW S D G GCin GCout */
1770 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1771
1772 /************************* Conv 2.1 ************************/
1773 /* N H W KH KW PH PW S D G GCin GCout */
1774 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 64, 128});
1775 /************************* Conv 2.2 ************************/
1776 /* N H W KH KW PH PW S D G GCin GCout */
1777 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1778
1779 /************************* Conv 3.1 ************************/
1780 /* N H W KH KW PH PW S D G GCin GCout */
1781 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 128, 256});
1782 /************************* Conv 3.2 ************************/
1783 /* N H W KH KW PH PW S D G GCin GCout */
1784 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1785 /************************* Conv 3.3 ************************/
1786 /* N H W KH KW PH PW S D G GCin GCout */
1787 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1788
1789 /************************* Conv 4.1 ************************/
1790 /* N H W KH KW PH PW S D G GCin GCout */
1791 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 256, 512});
1792 /************************* Conv 4.2 ************************/
1793 /* N H W KH KW PH PW S D G GCin GCout */
1794 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1795 /************************* Conv 4.3 ************************/
1796 /* N H W KH KW PH PW S D G GCin GCout */
1797 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1798
1799 /************************* Conv 5.X ************************/
1800 /* N H W KH KW PH PW S D G GCin GCout */
1801 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1802 /************************* Conv 5.3 ************************/
1803 /* N H W KH KW PH PW S D G GCin GCout */
1804 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1805}
1806
1807// SRCNN (9-1-5)
1808static void SRCNN915(benchmark::internal::Benchmark* b) {
1809 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1810
1811 /* N H W KH KW PH PW S D G GCin GCout */
1812 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1813 b->Args({1, 376, 376, 1, 1, 0, 0, 1, 1, 1, 64, 32});
1814 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1815}
1816
1817// SRCNN (9-3-5)
1818static void SRCNN935(benchmark::internal::Benchmark* b) {
1819 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1820
1821 /* N H W KH KW PH PW S D G GCin GCout */
1822 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1823 b->Args({1, 376, 376, 3, 3, 0, 0, 1, 1, 1, 64, 32});
1824 b->Args({1, 374, 374, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1825}
1826
1827// SRCNN (9-5-5)
1828static void SRCNN955(benchmark::internal::Benchmark* b) {
1829 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1830
1831 /* N H W KH KW PH PW S D G GCin GCout */
1832 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1833 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 64, 32});
1834 b->Args({1, 372, 372, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1835}
1836
Chao Meic6640272020-07-23 09:35:11 -07001837#ifndef XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -07001838BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1839BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1840BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1841BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1842BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1843BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1844BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1845BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1846BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1847BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1848BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1849BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1850BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1851BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1852BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1853BENCHMARK_CAPTURE(xnnpack_convolution_f16, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1854BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1855BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1856BENCHMARK_CAPTURE(xnnpack_convolution_f16, vgg, "VGG")->Apply(VGG)->UseRealTime();
1857BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1858BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1859BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -07001860#endif // XNN_NO_F16_OPERATORS
Frank Barchard49b4dcc2020-06-26 14:07:19 -07001861
XNNPACK Teamb455b122019-09-27 18:10:33 -07001862BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1863BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1864BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1865BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1866BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1867BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1868BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1869BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1870BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1871BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1872BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1873BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1874BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1875BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1876BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1877BENCHMARK_CAPTURE(xnnpack_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1878BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1879BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1880BENCHMARK_CAPTURE(xnnpack_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1881BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1882BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1883BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1884
Chao Meic6640272020-07-23 09:35:11 -07001885#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -07001886BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1887BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1888BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1889BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1890BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1891BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1892BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1893BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1894BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1895BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1896BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1897BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1898BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1899BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1900BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1901BENCHMARK_CAPTURE(xnnpack_convolution_qu8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1902BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1903BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1904BENCHMARK_CAPTURE(xnnpack_convolution_qu8, vgg, "VGG")->Apply(VGG)->UseRealTime();
1905BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1906BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1907BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -07001908#endif // XNN_NO_QU8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001909
1910#ifdef BENCHMARK_TENSORFLOW_LITE
1911 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1912 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1913 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1914 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1915 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1916 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1917 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1918 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1919 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1920 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1921 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1922 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1923 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1924 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1925 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1926 BENCHMARK_CAPTURE(tflite_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1927 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1928 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1929 BENCHMARK_CAPTURE(tflite_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1930 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1931 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1932 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1933#endif // BENCHMARK_TENSORFLOW_LITE
1934
1935#ifdef BENCHMARK_ARM_COMPUTE_LIBRARY
1936 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1937 BENCHMARK_CAPTURE(armcl_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1938 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1939 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1940 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1941 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1942 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1943 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1944 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1945 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1946 BENCHMARK_CAPTURE(armcl_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1947 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1948 BENCHMARK_CAPTURE(armcl_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1949 BENCHMARK_CAPTURE(armcl_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1950 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1951 BENCHMARK_CAPTURE(armcl_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1952 BENCHMARK_CAPTURE(armcl_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1953 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1954 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1955 BENCHMARK_CAPTURE(armcl_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1956#endif // BENCHMARK_ARM_COMPUTE_LIBRARY
1957
1958#ifndef XNNPACK_BENCHMARK_NO_MAIN
1959BENCHMARK_MAIN();
1960#endif