blob: 97d5ab0d9a575fecffb2e0f6bf74ae057f3096b6 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cmath>
11#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070012#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013#include <random>
14#include <vector>
15
16#include <xnnpack.h>
17
18#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070019#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070020
21
22static void channel_shuffle_x8(benchmark::State& state, const char* net) {
23 const size_t batch_size = static_cast<size_t>(state.range(0));
24 const size_t groups = static_cast<size_t>(state.range(1));
25 const size_t group_channels = static_cast<size_t>(state.range(2));
26
27 std::random_device random_device;
28 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070029 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070030
31 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + batch_size * groups * group_channels);
32 std::vector<uint8_t> output(batch_size * groups * group_channels);
33 std::generate(input.begin(), input.end(), std::ref(u8rng));
34
Marat Dukhan04f03be2019-11-19 12:36:47 -080035 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070036 if (status != xnn_status_success) {
37 state.SkipWithError("failed to initialize XNNPACK");
38 return;
39 }
40
41 xnn_operator_t channel_shuffle_op = nullptr;
42 status = xnn_create_channel_shuffle_nc_x8(
43 groups, group_channels,
44 groups * group_channels /* input stride */,
45 groups * group_channels /* output stride */,
46 0 /* flags */, &channel_shuffle_op);
47 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
48 state.SkipWithError("failed to create X8 Channel Shuffle operator");
49 return;
50 }
51
52 status = xnn_setup_channel_shuffle_nc_x8(
53 channel_shuffle_op,
54 batch_size,
55 input.data(), output.data(),
56 nullptr /* thread pool */);
57 if (status != xnn_status_success) {
58 state.SkipWithError("failed to setup X8 Channel Shuffle operator");
59 return;
60 }
61
62 for (auto _ : state) {
63 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
64 if (status != xnn_status_success) {
65 state.SkipWithError("failed to run X8 Channel Shuffle operator");
66 return;
67 }
68 }
69
70 status = xnn_delete_operator(channel_shuffle_op);
71 if (status != xnn_status_success) {
72 state.SkipWithError("failed to delete X8 Channel Shuffle operator");
73 return;
74 }
75
Frank Barchardbb4c18b2019-09-30 11:05:52 -070076 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
77
XNNPACK Teamb455b122019-09-27 18:10:33 -070078 const size_t elements_per_iteration = batch_size * groups * group_channels;
79 state.counters["elements"] =
80 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
81
82 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint8_t);
83 state.counters["bytes"] =
84 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
85}
86
87static void channel_shuffle_x32(benchmark::State& state, const char* net) {
88 const size_t batch_size = static_cast<size_t>(state.range(0));
89 const size_t groups = static_cast<size_t>(state.range(1));
90 const size_t group_channels = static_cast<size_t>(state.range(2));
91
92 std::random_device random_device;
93 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070094 auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070095
96 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + batch_size * groups * group_channels);
97 std::vector<float> output(batch_size * groups * group_channels);
98 std::generate(input.begin(), input.end(), std::ref(f32rng));
99
Marat Dukhan04f03be2019-11-19 12:36:47 -0800100 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700101 if (status != xnn_status_success) {
102 state.SkipWithError("failed to initialize XNNPACK");
103 return;
104 }
105
106 xnn_operator_t channel_shuffle_op = nullptr;
107 status = xnn_create_channel_shuffle_nc_x32(
108 groups, group_channels,
109 groups * group_channels /* input stride */,
110 groups * group_channels /* output stride */,
111 0 /* flags */, &channel_shuffle_op);
112 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
113 state.SkipWithError("failed to create X32 Channel Shuffle operator");
114 return;
115 }
116
117 status = xnn_setup_channel_shuffle_nc_x32(
118 channel_shuffle_op,
119 batch_size,
120 input.data(), output.data(),
121 nullptr /* thread pool */);
122 if (status != xnn_status_success) {
123 state.SkipWithError("failed to setup X32 Channel Shuffle operator");
124 return;
125 }
126
127 for (auto _ : state) {
128 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
129 if (status != xnn_status_success) {
130 state.SkipWithError("failed to run X32 Channel Shuffle operator");
131 return;
132 }
133 }
134
135 status = xnn_delete_operator(channel_shuffle_op);
136 if (status != xnn_status_success) {
137 state.SkipWithError("failed to delete X32 Channel Shuffle operator");
138 return;
139 }
140
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700141 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
142
XNNPACK Teamb455b122019-09-27 18:10:33 -0700143 const size_t elements_per_iteration = batch_size * groups * group_channels;
144 state.counters["elements"] =
145 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
146
147 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
148 state.counters["bytes"] =
149 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
150}
151
152static void ShuffleNetV1G2Arguments(benchmark::internal::Benchmark* b)
153{
154 b->ArgNames({"N", "G", "GC"});
155
156 /******** Stage 2 ********/
157 /* H W G CG */
158 b->Args({56 * 56, 2, 25});
159 b->Args({28 * 28, 2, 25});
160
161 /******** Stage 3 ********/
162 /* H W G CG */
163 b->Args({28 * 28, 2, 50});
164 b->Args({14 * 14, 2, 50});
165
166 /******** Stage 4 ********/
167 /* H W G CG */
168 b->Args({14 * 14, 2, 100});
169 b->Args({ 7 * 7, 2, 100});
170}
171
172static void ShuffleNetV1G3Arguments(benchmark::internal::Benchmark* b)
173{
174 b->ArgNames({"N", "G", "GC"});
175
176 /******** Stage 2 *******/
177 /* H W G CG */
178 b->Args({56 * 56, 3, 20});
179 b->Args({28 * 28, 3, 20});
180
181 /******** Stage 3 *******/
182 /* H W G CG */
183 b->Args({28 * 28, 3, 40});
184 b->Args({14 * 14, 3, 40});
185
186 /******** Stage 4 *******/
187 /* H W G CG */
188 b->Args({14 * 14, 3, 80});
189 b->Args({ 7 * 7, 3, 80});
190}
191
192static void ShuffleNetV1G4Arguments(benchmark::internal::Benchmark* b)
193{
194 b->ArgNames({"N", "G", "GC"});
195
196 /******** Stage 2 *******/
197 /* H W G CG */
198 b->Args({56 * 56, 4, 17});
199 b->Args({28 * 28, 4, 17});
200
201 /******** Stage 3 *******/
202 /* H W G CG */
203 b->Args({28 * 28, 4, 34});
204 b->Args({14 * 14, 4, 34});
205
206 /******** Stage 4 *******/
207 /* H W G CG */
208 b->Args({14 * 14, 4, 68});
209 b->Args({ 7 * 7, 4, 68});
210}
211
212static void ShuffleNetV1G8Arguments(benchmark::internal::Benchmark* b)
213{
214 b->ArgNames({"N", "G", "GC"});
215
216 /******** Stage 2 *******/
217 /* H W G CG */
218 b->Args({56 * 56, 8, 12});
219 b->Args({28 * 28, 8, 12});
220
221 /******** Stage 3 *******/
222 /* H W G CG */
223 b->Args({28 * 28, 8, 24});
224 b->Args({14 * 14, 8, 24});
225
226 /******** Stage 4 *******/
227 /* H W G CG */
228 b->Args({14 * 14, 8, 48});
229 b->Args({ 7 * 7, 8, 48});
230}
231
232static void ShuffleNetV2x0_5Arguments(benchmark::internal::Benchmark* b)
233{
234 b->ArgNames({"N", "G", "GC"});
235
236 /******** Stage 2 *******/
237 /* H W G CG */
238 b->Args({28 * 28, 2, 24});
239
240 /******** Stage 3 *******/
241 /* H W G CG */
242 b->Args({14 * 14, 2, 48});
243
244 /******** Stage 4 *******/
245 /* H W G CG */
246 b->Args({ 7 * 7, 2, 96});
247}
248
249static void ShuffleNetV2x1_0Arguments(benchmark::internal::Benchmark* b)
250{
251 b->ArgNames({"N", "G", "GC"});
252
253 /******** Stage 2 ********/
254 /* H W G CG */
255 b->Args({28 * 28, 2, 58});
256
257 /******** Stage 3 ********/
258 /* H W G CG */
259 b->Args({14 * 14, 2, 116});
260
261 /******** Stage 4 ********/
262 /* H W G CG */
263 b->Args({ 7 * 7, 2, 232});
264}
265
266static void ShuffleNetV2x1_5Arguments(benchmark::internal::Benchmark* b)
267{
268 b->ArgNames({"N", "G", "GC"});
269
270 /******** Stage 2 ********/
271 /* H W G CG */
272 b->Args({28 * 28, 2, 88});
273
274 /******** Stage 3 ********/
275 /* H W G CG */
276 b->Args({14 * 14, 2, 176});
277
278 /******** Stage 4 ********/
279 /* H W G CG */
280 b->Args({ 7 * 7, 2, 352});
281}
282
283static void ShuffleNetV2x2_0Arguments(benchmark::internal::Benchmark* b)
284{
285 b->ArgNames({"N", "G", "GC"});
286
287 /******** Stage 2 ********/
288 /* H W G CG */
289 b->Args({28 * 28, 2, 122});
290
291 /******** Stage 3 ********/
292 /* H W G CG */
293 b->Args({14 * 14, 2, 244});
294
295 /******** Stage 4 ********/
296 /* H W G CG */
297 b->Args({ 7 * 7, 2, 488});
298}
299
300BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
301BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
302BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
303BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
304BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
305BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
306BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
307BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
308
309BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
310BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
311BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
312BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
313BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
314BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
315BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
316BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
317
318#ifndef XNNPACK_BENCHMARK_NO_MAIN
319BENCHMARK_MAIN();
320#endif