blob: 1adb43ea97029474eff4161766907c743b11390e [file] [log] [blame]
Marat Dukhanad352602020-06-25 21:50:54 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <array>
8#include <cmath>
9#include <functional>
10#include <limits>
11#include <random>
12#include <vector>
13
Frank Barchardfc2844e2020-09-15 13:06:28 -070014#include <fp16.h>
15
Marat Dukhanad352602020-06-25 21:50:54 -070016#include <xnnpack.h>
17
18#include <benchmark/benchmark.h>
19#include "bench/utils.h"
20#ifdef BENCHMARK_TENSORFLOW_LITE
21#include "flatbuffers/include/flatbuffers/flatbuffers.h"
22#include "tensorflow/lite/interpreter.h"
23#include "tensorflow/lite/kernels/register.h"
24#include "tensorflow/lite/model.h"
25#include "tensorflow/lite/schema/schema_generated.h"
26#include "tensorflow/lite/version.h"
27#endif // BENCHMARK_TENSORFLOW_LITE
28
29
30static void xnnpack_hardswish_f32(benchmark::State& state) {
31 const size_t batch_size = state.range(0);
32 const size_t channels = state.range(1);
33
34 std::random_device random_device;
35 auto rng = std::mt19937(random_device());
Marat Dukhana11ca342020-06-25 23:45:07 -070036 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhanad352602020-06-25 21:50:54 -070037
38 std::vector<float> input(batch_size * channels);
39 std::vector<float> output(batch_size * channels);
40 std::generate(input.begin(), input.end(), std::ref(f32rng));
41 std::fill(output.begin(), output.end(), std::nanf(""));
42
43 xnn_status status = xnn_initialize(nullptr /* allocator */);
44 if (status != xnn_status_success) {
45 state.SkipWithError("failed to initialize XNNPACK");
46 return;
47 }
48
49 xnn_operator_t hardswish_op = nullptr;
50 status = xnn_create_hardswish_nc_f32(
51 channels, channels /* input stride */, channels /* output stride */,
52 0 /* flags */, &hardswish_op);
53 if (status != xnn_status_success || hardswish_op == nullptr) {
Marat Dukhan6804bbd2020-06-30 19:26:11 -070054 state.SkipWithError("failed to create HardSwish operator");
Marat Dukhanad352602020-06-25 21:50:54 -070055 return;
56 }
57
58 status = xnn_setup_hardswish_nc_f32(
59 hardswish_op,
60 batch_size,
61 input.data(), output.data(),
62 nullptr /* thread pool */);
63 if (status != xnn_status_success) {
Marat Dukhan6804bbd2020-06-30 19:26:11 -070064 state.SkipWithError("failed to setup HardSwish operator");
Marat Dukhanad352602020-06-25 21:50:54 -070065 return;
66 }
67
68 for (auto _ : state) {
69 status = xnn_run_operator(hardswish_op, nullptr /* thread pool */);
70 if (status != xnn_status_success) {
Marat Dukhan6804bbd2020-06-30 19:26:11 -070071 state.SkipWithError("failed to run HardSwish operator");
Marat Dukhanad352602020-06-25 21:50:54 -070072 return;
73 }
74 }
75
76 status = xnn_delete_operator(hardswish_op);
77 if (status != xnn_status_success) {
Marat Dukhan6804bbd2020-06-30 19:26:11 -070078 state.SkipWithError("failed to delete HardSwish operator");
Marat Dukhanad352602020-06-25 21:50:54 -070079 return;
80 }
81
Marat Dukhand713e8a2020-12-04 14:23:12 -080082 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
83 if (cpu_frequency != 0) {
84 state.counters["cpufreq"] = cpu_frequency;
85 }
Marat Dukhanad352602020-06-25 21:50:54 -070086
87 const size_t elements_per_iteration = batch_size * channels;
88 state.counters["elements"] =
89 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
90
91 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
92 state.counters["bytes"] =
93 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
94}
95
96#ifdef BENCHMARK_TENSORFLOW_LITE
97static void tflite_hardswish_f32(benchmark::State& state) {
98 const size_t batch_size = state.range(0);
99 const size_t channels = state.range(1);
100
101 std::random_device random_device;
102 auto rng = std::mt19937(random_device());
Marat Dukhana11ca342020-06-25 23:45:07 -0700103 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhanad352602020-06-25 21:50:54 -0700104
105 flatbuffers::FlatBufferBuilder builder;
106 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
107 CreateOperatorCode(builder, tflite::BuiltinOperator_HARD_SWISH);
108
109 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
110 tflite::CreateBuffer(builder, builder.CreateVector({})),
111 }};
112
113 const std::array<int32_t, 4> input_shape{{
114 static_cast<int32_t>(batch_size),
115 static_cast<int32_t>(1 /* height */),
116 static_cast<int32_t>(1 /* width */),
117 static_cast<int32_t>(channels)
118 }};
119 const std::array<int32_t, 4> output_shape{{
120 static_cast<int32_t>(batch_size),
121 static_cast<int32_t>(1 /* height */),
122 static_cast<int32_t>(1 /* width */),
123 static_cast<int32_t>(channels)
124 }};
125
126 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
127 tflite::CreateTensor(builder,
128 builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
129 tflite::TensorType_FLOAT32),
130 tflite::CreateTensor(builder,
131 builder.CreateVector<int32_t>(output_shape.data(), output_shape.size()),
132 tflite::TensorType_FLOAT32),
133 }};
134
135 const std::array<int32_t, 1> op_inputs{{ 0 }};
136 const std::array<int32_t, 1> op_outputs{{ 1 }};
137 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
138 builder,
139 0 /* opcode_index */,
140 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
141 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
142
143 const std::array<int32_t, 1> graph_inputs{{ 0 }};
144 const std::array<int32_t, 1> graph_outputs{{ 1 }};
145 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
146 builder,
147 builder.CreateVector(tensors.data(), tensors.size()),
148 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
149 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
150 builder.CreateVector(&op, 1));
151
152 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
153 TFLITE_SCHEMA_VERSION,
154 builder.CreateVector(&operator_code, 1),
155 builder.CreateVector(&subgraph, 1),
156 builder.CreateString("HardSwish model"),
157 builder.CreateVector(buffers.data(), buffers.size()));
158
159 builder.Finish(model_buffer);
160
161 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
Chao Meif9fdaa72021-05-18 23:04:34 -0700162 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
Marat Dukhanad352602020-06-25 21:50:54 -0700163 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
164 std::unique_ptr<tflite::Interpreter> interpreter;
165 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
166 state.SkipWithError("failed to create TFLite interpreter");
167 return;
168 }
169 if (interpreter == nullptr) {
170 state.SkipWithError("TFLite interpreter is null");
171 return;
172 }
173 interpreter->SetNumThreads(1);
174
175 if (interpreter->AllocateTensors() != kTfLiteOk) {
176 state.SkipWithError("failed to allocate tensors");
177 return;
178 }
179
180 std::generate(
181 interpreter->typed_tensor<float>(0),
182 interpreter->typed_tensor<float>(0) + batch_size * channels,
183 std::ref(f32rng));
184
185 for (auto _ : state) {
186 if (interpreter->Invoke() != kTfLiteOk) {
187 state.SkipWithError("failed to invoke TFLite interpreter");
188 return;
189 }
190 }
191
Marat Dukhand713e8a2020-12-04 14:23:12 -0800192 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
193 if (cpu_frequency != 0) {
194 state.counters["cpufreq"] = cpu_frequency;
195 }
Marat Dukhanad352602020-06-25 21:50:54 -0700196
197 const size_t elements_per_iteration = batch_size * channels;
198 state.counters["elements"] =
199 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
200
201 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
202 state.counters["bytes"] =
203 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
204
205 interpreter.reset();
206}
207#endif // BENCHMARK_TENSORFLOW_LITE
208
Frank Barchardfc2844e2020-09-15 13:06:28 -0700209#ifndef XNN_NO_F16_OPERATORS
210static void xnnpack_hardswish_f16(benchmark::State& state) {
211 const size_t batch_size = state.range(0);
212 const size_t channels = state.range(1);
213
214 std::random_device random_device;
215 auto rng = std::mt19937(random_device());
216 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
217 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
218
219 std::vector<uint16_t> input(batch_size * channels);
220 std::vector<uint16_t> output(batch_size * channels);
221 std::generate(input.begin(), input.end(), std::ref(f16rng));
222 std::fill(output.begin(), output.end(), std::nanf(""));
223
224 xnn_status status = xnn_initialize(nullptr /* allocator */);
225 if (status != xnn_status_success) {
226 state.SkipWithError("failed to initialize XNNPACK");
227 return;
228 }
229
230 xnn_operator_t hardswish_op = nullptr;
231 status = xnn_create_hardswish_nc_f16(
232 channels, channels /* input stride */, channels /* output stride */,
233 0 /* flags */, &hardswish_op);
234 if (status != xnn_status_success || hardswish_op == nullptr) {
235 state.SkipWithError("failed to create HardSwish operator");
236 return;
237 }
238
239 status = xnn_setup_hardswish_nc_f16(
240 hardswish_op,
241 batch_size,
242 input.data(), output.data(),
243 nullptr /* thread pool */);
244 if (status != xnn_status_success) {
245 state.SkipWithError("failed to setup HardSwish operator");
246 return;
247 }
248
249 for (auto _ : state) {
250 status = xnn_run_operator(hardswish_op, nullptr /* thread pool */);
251 if (status != xnn_status_success) {
252 state.SkipWithError("failed to run HardSwish operator");
253 return;
254 }
255 }
256
257 status = xnn_delete_operator(hardswish_op);
258 if (status != xnn_status_success) {
259 state.SkipWithError("failed to delete HardSwish operator");
260 return;
261 }
262
Marat Dukhand713e8a2020-12-04 14:23:12 -0800263 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
264 if (cpu_frequency != 0) {
265 state.counters["cpufreq"] = cpu_frequency;
266 }
Frank Barchardfc2844e2020-09-15 13:06:28 -0700267
268 const size_t elements_per_iteration = batch_size * channels;
269 state.counters["elements"] =
270 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
271
272 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint16_t);
273 state.counters["bytes"] =
274 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
275}
276#endif // XNN_NO_F16_OPERATORS
277
Marat Dukhanad352602020-06-25 21:50:54 -0700278static void CharacteristicArguments(benchmark::internal::Benchmark* b)
279{
280 b->ArgNames({"N", "C"});
281
282 int32_t c = 16;
283 for (int32_t n = 224; n >= 7; n /= 2) {
284 b->Args({n * n, c});
285 c *= 2;
286 }
287}
288
289BENCHMARK(xnnpack_hardswish_f32)->Apply(CharacteristicArguments)->UseRealTime();
290
291#ifdef BENCHMARK_TENSORFLOW_LITE
292 BENCHMARK(tflite_hardswish_f32)->Apply(CharacteristicArguments)->UseRealTime();
293#endif // BENCHMARK_TENSORFLOW_LITE
Frank Barchardfc2844e2020-09-15 13:06:28 -0700294#ifndef XNN_NO_F16_OPERATORS
295BENCHMARK(xnnpack_hardswish_f16)->Apply(CharacteristicArguments)->UseRealTime();
296#endif // XNN_NO_F16_OPERATORS
Marat Dukhanad352602020-06-25 21:50:54 -0700297
298#ifndef XNNPACK_BENCHMARK_NO_MAIN
299BENCHMARK_MAIN();
300#endif