blob: b45df5965a8bee18c337125a1a5522c8fdbcb663 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cmath>
11#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070012#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013#include <random>
14#include <vector>
15
16#include <xnnpack.h>
17
18#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070019#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070020
21
22static void channel_shuffle_x8(benchmark::State& state, const char* net) {
23 const size_t batch_size = static_cast<size_t>(state.range(0));
24 const size_t groups = static_cast<size_t>(state.range(1));
25 const size_t group_channels = static_cast<size_t>(state.range(2));
26
27 std::random_device random_device;
28 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070029 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070030
31 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + batch_size * groups * group_channels);
32 std::vector<uint8_t> output(batch_size * groups * group_channels);
33 std::generate(input.begin(), input.end(), std::ref(u8rng));
34
Marat Dukhan04f03be2019-11-19 12:36:47 -080035 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070036 if (status != xnn_status_success) {
37 state.SkipWithError("failed to initialize XNNPACK");
38 return;
39 }
40
41 xnn_operator_t channel_shuffle_op = nullptr;
42 status = xnn_create_channel_shuffle_nc_x8(
43 groups, group_channels,
44 groups * group_channels /* input stride */,
45 groups * group_channels /* output stride */,
46 0 /* flags */, &channel_shuffle_op);
47 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
48 state.SkipWithError("failed to create X8 Channel Shuffle operator");
49 return;
50 }
51
52 status = xnn_setup_channel_shuffle_nc_x8(
53 channel_shuffle_op,
54 batch_size,
55 input.data(), output.data(),
56 nullptr /* thread pool */);
57 if (status != xnn_status_success) {
58 state.SkipWithError("failed to setup X8 Channel Shuffle operator");
59 return;
60 }
61
62 for (auto _ : state) {
63 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
64 if (status != xnn_status_success) {
65 state.SkipWithError("failed to run X8 Channel Shuffle operator");
66 return;
67 }
68 }
69
70 status = xnn_delete_operator(channel_shuffle_op);
71 if (status != xnn_status_success) {
72 state.SkipWithError("failed to delete X8 Channel Shuffle operator");
73 return;
74 }
75
Marat Dukhand713e8a2020-12-04 14:23:12 -080076 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
77 if (cpu_frequency != 0) {
78 state.counters["cpufreq"] = cpu_frequency;
79 }
Frank Barchardbb4c18b2019-09-30 11:05:52 -070080
XNNPACK Teamb455b122019-09-27 18:10:33 -070081 const size_t elements_per_iteration = batch_size * groups * group_channels;
82 state.counters["elements"] =
83 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
84
85 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint8_t);
86 state.counters["bytes"] =
87 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
88}
89
90static void channel_shuffle_x32(benchmark::State& state, const char* net) {
91 const size_t batch_size = static_cast<size_t>(state.range(0));
92 const size_t groups = static_cast<size_t>(state.range(1));
93 const size_t group_channels = static_cast<size_t>(state.range(2));
94
95 std::random_device random_device;
96 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070097 auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070098
99 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + batch_size * groups * group_channels);
100 std::vector<float> output(batch_size * groups * group_channels);
101 std::generate(input.begin(), input.end(), std::ref(f32rng));
102
Marat Dukhan04f03be2019-11-19 12:36:47 -0800103 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700104 if (status != xnn_status_success) {
105 state.SkipWithError("failed to initialize XNNPACK");
106 return;
107 }
108
109 xnn_operator_t channel_shuffle_op = nullptr;
110 status = xnn_create_channel_shuffle_nc_x32(
111 groups, group_channels,
112 groups * group_channels /* input stride */,
113 groups * group_channels /* output stride */,
114 0 /* flags */, &channel_shuffle_op);
115 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
116 state.SkipWithError("failed to create X32 Channel Shuffle operator");
117 return;
118 }
119
120 status = xnn_setup_channel_shuffle_nc_x32(
121 channel_shuffle_op,
122 batch_size,
123 input.data(), output.data(),
124 nullptr /* thread pool */);
125 if (status != xnn_status_success) {
126 state.SkipWithError("failed to setup X32 Channel Shuffle operator");
127 return;
128 }
129
130 for (auto _ : state) {
131 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
132 if (status != xnn_status_success) {
133 state.SkipWithError("failed to run X32 Channel Shuffle operator");
134 return;
135 }
136 }
137
138 status = xnn_delete_operator(channel_shuffle_op);
139 if (status != xnn_status_success) {
140 state.SkipWithError("failed to delete X32 Channel Shuffle operator");
141 return;
142 }
143
Marat Dukhand713e8a2020-12-04 14:23:12 -0800144 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
145 if (cpu_frequency != 0) {
146 state.counters["cpufreq"] = cpu_frequency;
147 }
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700148
XNNPACK Teamb455b122019-09-27 18:10:33 -0700149 const size_t elements_per_iteration = batch_size * groups * group_channels;
150 state.counters["elements"] =
151 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
152
153 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
154 state.counters["bytes"] =
155 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
156}
157
158static void ShuffleNetV1G2Arguments(benchmark::internal::Benchmark* b)
159{
160 b->ArgNames({"N", "G", "GC"});
161
162 /******** Stage 2 ********/
163 /* H W G CG */
164 b->Args({56 * 56, 2, 25});
165 b->Args({28 * 28, 2, 25});
166
167 /******** Stage 3 ********/
168 /* H W G CG */
169 b->Args({28 * 28, 2, 50});
170 b->Args({14 * 14, 2, 50});
171
172 /******** Stage 4 ********/
173 /* H W G CG */
174 b->Args({14 * 14, 2, 100});
175 b->Args({ 7 * 7, 2, 100});
176}
177
178static void ShuffleNetV1G3Arguments(benchmark::internal::Benchmark* b)
179{
180 b->ArgNames({"N", "G", "GC"});
181
182 /******** Stage 2 *******/
183 /* H W G CG */
184 b->Args({56 * 56, 3, 20});
185 b->Args({28 * 28, 3, 20});
186
187 /******** Stage 3 *******/
188 /* H W G CG */
189 b->Args({28 * 28, 3, 40});
190 b->Args({14 * 14, 3, 40});
191
192 /******** Stage 4 *******/
193 /* H W G CG */
194 b->Args({14 * 14, 3, 80});
195 b->Args({ 7 * 7, 3, 80});
196}
197
198static void ShuffleNetV1G4Arguments(benchmark::internal::Benchmark* b)
199{
200 b->ArgNames({"N", "G", "GC"});
201
202 /******** Stage 2 *******/
203 /* H W G CG */
204 b->Args({56 * 56, 4, 17});
205 b->Args({28 * 28, 4, 17});
206
207 /******** Stage 3 *******/
208 /* H W G CG */
209 b->Args({28 * 28, 4, 34});
210 b->Args({14 * 14, 4, 34});
211
212 /******** Stage 4 *******/
213 /* H W G CG */
214 b->Args({14 * 14, 4, 68});
215 b->Args({ 7 * 7, 4, 68});
216}
217
218static void ShuffleNetV1G8Arguments(benchmark::internal::Benchmark* b)
219{
220 b->ArgNames({"N", "G", "GC"});
221
222 /******** Stage 2 *******/
223 /* H W G CG */
224 b->Args({56 * 56, 8, 12});
225 b->Args({28 * 28, 8, 12});
226
227 /******** Stage 3 *******/
228 /* H W G CG */
229 b->Args({28 * 28, 8, 24});
230 b->Args({14 * 14, 8, 24});
231
232 /******** Stage 4 *******/
233 /* H W G CG */
234 b->Args({14 * 14, 8, 48});
235 b->Args({ 7 * 7, 8, 48});
236}
237
238static void ShuffleNetV2x0_5Arguments(benchmark::internal::Benchmark* b)
239{
240 b->ArgNames({"N", "G", "GC"});
241
242 /******** Stage 2 *******/
243 /* H W G CG */
244 b->Args({28 * 28, 2, 24});
245
246 /******** Stage 3 *******/
247 /* H W G CG */
248 b->Args({14 * 14, 2, 48});
249
250 /******** Stage 4 *******/
251 /* H W G CG */
252 b->Args({ 7 * 7, 2, 96});
253}
254
255static void ShuffleNetV2x1_0Arguments(benchmark::internal::Benchmark* b)
256{
257 b->ArgNames({"N", "G", "GC"});
258
259 /******** Stage 2 ********/
260 /* H W G CG */
261 b->Args({28 * 28, 2, 58});
262
263 /******** Stage 3 ********/
264 /* H W G CG */
265 b->Args({14 * 14, 2, 116});
266
267 /******** Stage 4 ********/
268 /* H W G CG */
269 b->Args({ 7 * 7, 2, 232});
270}
271
272static void ShuffleNetV2x1_5Arguments(benchmark::internal::Benchmark* b)
273{
274 b->ArgNames({"N", "G", "GC"});
275
276 /******** Stage 2 ********/
277 /* H W G CG */
278 b->Args({28 * 28, 2, 88});
279
280 /******** Stage 3 ********/
281 /* H W G CG */
282 b->Args({14 * 14, 2, 176});
283
284 /******** Stage 4 ********/
285 /* H W G CG */
286 b->Args({ 7 * 7, 2, 352});
287}
288
289static void ShuffleNetV2x2_0Arguments(benchmark::internal::Benchmark* b)
290{
291 b->ArgNames({"N", "G", "GC"});
292
293 /******** Stage 2 ********/
294 /* H W G CG */
295 b->Args({28 * 28, 2, 122});
296
297 /******** Stage 3 ********/
298 /* H W G CG */
299 b->Args({14 * 14, 2, 244});
300
301 /******** Stage 4 ********/
302 /* H W G CG */
303 b->Args({ 7 * 7, 2, 488});
304}
305
306BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
307BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
308BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
309BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
310BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
311BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
312BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
313BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
314
315BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
316BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
317BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
318BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
319BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
320BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
321BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
322BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
323
324#ifndef XNNPACK_BENCHMARK_NO_MAIN
325BENCHMARK_MAIN();
326#endif