blob: 67895d97e499d9407da9438c7d439db866abb9fb [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cmath>
11#include <functional>
12#include <random>
13#include <vector>
14
15#include <xnnpack.h>
16
17#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070018#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070019
20
21static void channel_shuffle_x8(benchmark::State& state, const char* net) {
22 const size_t batch_size = static_cast<size_t>(state.range(0));
23 const size_t groups = static_cast<size_t>(state.range(1));
24 const size_t group_channels = static_cast<size_t>(state.range(2));
25
26 std::random_device random_device;
27 auto rng = std::mt19937(random_device());
28 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
29
30 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + batch_size * groups * group_channels);
31 std::vector<uint8_t> output(batch_size * groups * group_channels);
32 std::generate(input.begin(), input.end(), std::ref(u8rng));
33
Marat Dukhan04f03be2019-11-19 12:36:47 -080034 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070035 if (status != xnn_status_success) {
36 state.SkipWithError("failed to initialize XNNPACK");
37 return;
38 }
39
40 xnn_operator_t channel_shuffle_op = nullptr;
41 status = xnn_create_channel_shuffle_nc_x8(
42 groups, group_channels,
43 groups * group_channels /* input stride */,
44 groups * group_channels /* output stride */,
45 0 /* flags */, &channel_shuffle_op);
46 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
47 state.SkipWithError("failed to create X8 Channel Shuffle operator");
48 return;
49 }
50
51 status = xnn_setup_channel_shuffle_nc_x8(
52 channel_shuffle_op,
53 batch_size,
54 input.data(), output.data(),
55 nullptr /* thread pool */);
56 if (status != xnn_status_success) {
57 state.SkipWithError("failed to setup X8 Channel Shuffle operator");
58 return;
59 }
60
61 for (auto _ : state) {
62 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
63 if (status != xnn_status_success) {
64 state.SkipWithError("failed to run X8 Channel Shuffle operator");
65 return;
66 }
67 }
68
69 status = xnn_delete_operator(channel_shuffle_op);
70 if (status != xnn_status_success) {
71 state.SkipWithError("failed to delete X8 Channel Shuffle operator");
72 return;
73 }
74
Frank Barchardbb4c18b2019-09-30 11:05:52 -070075 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
76
XNNPACK Teamb455b122019-09-27 18:10:33 -070077 const size_t elements_per_iteration = batch_size * groups * group_channels;
78 state.counters["elements"] =
79 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
80
81 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint8_t);
82 state.counters["bytes"] =
83 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
84}
85
86static void channel_shuffle_x32(benchmark::State& state, const char* net) {
87 const size_t batch_size = static_cast<size_t>(state.range(0));
88 const size_t groups = static_cast<size_t>(state.range(1));
89 const size_t group_channels = static_cast<size_t>(state.range(2));
90
91 std::random_device random_device;
92 auto rng = std::mt19937(random_device());
93 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
94
95 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + batch_size * groups * group_channels);
96 std::vector<float> output(batch_size * groups * group_channels);
97 std::generate(input.begin(), input.end(), std::ref(f32rng));
98
Marat Dukhan04f03be2019-11-19 12:36:47 -080099 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700100 if (status != xnn_status_success) {
101 state.SkipWithError("failed to initialize XNNPACK");
102 return;
103 }
104
105 xnn_operator_t channel_shuffle_op = nullptr;
106 status = xnn_create_channel_shuffle_nc_x32(
107 groups, group_channels,
108 groups * group_channels /* input stride */,
109 groups * group_channels /* output stride */,
110 0 /* flags */, &channel_shuffle_op);
111 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
112 state.SkipWithError("failed to create X32 Channel Shuffle operator");
113 return;
114 }
115
116 status = xnn_setup_channel_shuffle_nc_x32(
117 channel_shuffle_op,
118 batch_size,
119 input.data(), output.data(),
120 nullptr /* thread pool */);
121 if (status != xnn_status_success) {
122 state.SkipWithError("failed to setup X32 Channel Shuffle operator");
123 return;
124 }
125
126 for (auto _ : state) {
127 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
128 if (status != xnn_status_success) {
129 state.SkipWithError("failed to run X32 Channel Shuffle operator");
130 return;
131 }
132 }
133
134 status = xnn_delete_operator(channel_shuffle_op);
135 if (status != xnn_status_success) {
136 state.SkipWithError("failed to delete X32 Channel Shuffle operator");
137 return;
138 }
139
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700140 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
141
XNNPACK Teamb455b122019-09-27 18:10:33 -0700142 const size_t elements_per_iteration = batch_size * groups * group_channels;
143 state.counters["elements"] =
144 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
145
146 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
147 state.counters["bytes"] =
148 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
149}
150
151static void ShuffleNetV1G2Arguments(benchmark::internal::Benchmark* b)
152{
153 b->ArgNames({"N", "G", "GC"});
154
155 /******** Stage 2 ********/
156 /* H W G CG */
157 b->Args({56 * 56, 2, 25});
158 b->Args({28 * 28, 2, 25});
159
160 /******** Stage 3 ********/
161 /* H W G CG */
162 b->Args({28 * 28, 2, 50});
163 b->Args({14 * 14, 2, 50});
164
165 /******** Stage 4 ********/
166 /* H W G CG */
167 b->Args({14 * 14, 2, 100});
168 b->Args({ 7 * 7, 2, 100});
169}
170
171static void ShuffleNetV1G3Arguments(benchmark::internal::Benchmark* b)
172{
173 b->ArgNames({"N", "G", "GC"});
174
175 /******** Stage 2 *******/
176 /* H W G CG */
177 b->Args({56 * 56, 3, 20});
178 b->Args({28 * 28, 3, 20});
179
180 /******** Stage 3 *******/
181 /* H W G CG */
182 b->Args({28 * 28, 3, 40});
183 b->Args({14 * 14, 3, 40});
184
185 /******** Stage 4 *******/
186 /* H W G CG */
187 b->Args({14 * 14, 3, 80});
188 b->Args({ 7 * 7, 3, 80});
189}
190
191static void ShuffleNetV1G4Arguments(benchmark::internal::Benchmark* b)
192{
193 b->ArgNames({"N", "G", "GC"});
194
195 /******** Stage 2 *******/
196 /* H W G CG */
197 b->Args({56 * 56, 4, 17});
198 b->Args({28 * 28, 4, 17});
199
200 /******** Stage 3 *******/
201 /* H W G CG */
202 b->Args({28 * 28, 4, 34});
203 b->Args({14 * 14, 4, 34});
204
205 /******** Stage 4 *******/
206 /* H W G CG */
207 b->Args({14 * 14, 4, 68});
208 b->Args({ 7 * 7, 4, 68});
209}
210
211static void ShuffleNetV1G8Arguments(benchmark::internal::Benchmark* b)
212{
213 b->ArgNames({"N", "G", "GC"});
214
215 /******** Stage 2 *******/
216 /* H W G CG */
217 b->Args({56 * 56, 8, 12});
218 b->Args({28 * 28, 8, 12});
219
220 /******** Stage 3 *******/
221 /* H W G CG */
222 b->Args({28 * 28, 8, 24});
223 b->Args({14 * 14, 8, 24});
224
225 /******** Stage 4 *******/
226 /* H W G CG */
227 b->Args({14 * 14, 8, 48});
228 b->Args({ 7 * 7, 8, 48});
229}
230
231static void ShuffleNetV2x0_5Arguments(benchmark::internal::Benchmark* b)
232{
233 b->ArgNames({"N", "G", "GC"});
234
235 /******** Stage 2 *******/
236 /* H W G CG */
237 b->Args({28 * 28, 2, 24});
238
239 /******** Stage 3 *******/
240 /* H W G CG */
241 b->Args({14 * 14, 2, 48});
242
243 /******** Stage 4 *******/
244 /* H W G CG */
245 b->Args({ 7 * 7, 2, 96});
246}
247
248static void ShuffleNetV2x1_0Arguments(benchmark::internal::Benchmark* b)
249{
250 b->ArgNames({"N", "G", "GC"});
251
252 /******** Stage 2 ********/
253 /* H W G CG */
254 b->Args({28 * 28, 2, 58});
255
256 /******** Stage 3 ********/
257 /* H W G CG */
258 b->Args({14 * 14, 2, 116});
259
260 /******** Stage 4 ********/
261 /* H W G CG */
262 b->Args({ 7 * 7, 2, 232});
263}
264
265static void ShuffleNetV2x1_5Arguments(benchmark::internal::Benchmark* b)
266{
267 b->ArgNames({"N", "G", "GC"});
268
269 /******** Stage 2 ********/
270 /* H W G CG */
271 b->Args({28 * 28, 2, 88});
272
273 /******** Stage 3 ********/
274 /* H W G CG */
275 b->Args({14 * 14, 2, 176});
276
277 /******** Stage 4 ********/
278 /* H W G CG */
279 b->Args({ 7 * 7, 2, 352});
280}
281
282static void ShuffleNetV2x2_0Arguments(benchmark::internal::Benchmark* b)
283{
284 b->ArgNames({"N", "G", "GC"});
285
286 /******** Stage 2 ********/
287 /* H W G CG */
288 b->Args({28 * 28, 2, 122});
289
290 /******** Stage 3 ********/
291 /* H W G CG */
292 b->Args({14 * 14, 2, 244});
293
294 /******** Stage 4 ********/
295 /* H W G CG */
296 b->Args({ 7 * 7, 2, 488});
297}
298
299BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
300BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
301BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
302BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
303BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
304BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
305BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
306BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
307
308BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
309BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
310BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
311BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
312BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
313BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
314BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
315BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
316
317#ifndef XNNPACK_BENCHMARK_NO_MAIN
318BENCHMARK_MAIN();
319#endif