blob: 6f0bdfd2ea8d710949908f18f9ecb93c908c9ab0 [file] [log] [blame]
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -08001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <array>
8#include <cmath>
9#include <functional>
10#include <limits>
11#include <random>
12#include <vector>
13
14#include <xnnpack.h>
15
16#include <benchmark/benchmark.h>
17#include "bench/utils.h"
18#ifdef BENCHMARK_TENSORFLOW_LITE
19#include "flatbuffers/include/flatbuffers/flatbuffers.h"
20#include "tensorflow/lite/interpreter.h"
21#include "tensorflow/lite/kernels/register.h"
22#include "tensorflow/lite/model.h"
23#include "tensorflow/lite/schema/schema_generated.h"
24#include "tensorflow/lite/version.h"
25#endif // BENCHMARK_TENSORFLOW_LITE
26
27
28static void xnnpack_elu_f32(benchmark::State& state) {
29 const size_t batch_size = state.range(0);
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -080030
31 std::random_device random_device;
32 auto rng = std::mt19937(random_device());
33 auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 20.0f), std::ref(rng));
34
Marat Dukhana0129e92021-12-30 15:59:28 -080035 std::vector<float> input(batch_size + XNN_EXTRA_BYTES / sizeof(float));
36 std::vector<float> output(batch_size);
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -080037 std::generate(input.begin(), input.end(), std::ref(f32rng));
38 std::fill(output.begin(), output.end(), std::nanf(""));
39
40 xnn_status status = xnn_initialize(nullptr /* allocator */);
41 if (status != xnn_status_success) {
42 state.SkipWithError("failed to initialize XNNPACK");
43 return;
44 }
45
46 xnn_operator_t elu_op = nullptr;
47 status = xnn_create_elu_nc_f32(
Marat Dukhana0129e92021-12-30 15:59:28 -080048 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -080049 1.0f /* alpha */, 0 /* flags */, &elu_op);
50 if (status != xnn_status_success || elu_op == nullptr) {
51 state.SkipWithError("failed to create ELU operator");
52 return;
53 }
54
55 status = xnn_setup_elu_nc_f32(
Marat Dukhana0129e92021-12-30 15:59:28 -080056 elu_op, batch_size,
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -080057 input.data(), output.data(),
58 nullptr /* thread pool */);
59 if (status != xnn_status_success) {
60 state.SkipWithError("failed to setup ELU operator");
61 return;
62 }
63
64 for (auto _ : state) {
65 status = xnn_run_operator(elu_op, nullptr /* thread pool */);
66 if (status != xnn_status_success) {
67 state.SkipWithError("failed to run ELU operator");
68 return;
69 }
70 }
71
72 status = xnn_delete_operator(elu_op);
73 if (status != xnn_status_success) {
74 state.SkipWithError("failed to delete ELU operator");
75 return;
76 }
77
78 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
79 if (cpu_frequency != 0) {
80 state.counters["cpufreq"] = cpu_frequency;
81 }
82
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -080083 state.counters["elements"] =
Marat Dukhana0129e92021-12-30 15:59:28 -080084 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -080085
Marat Dukhana0129e92021-12-30 15:59:28 -080086 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -080087 state.counters["bytes"] =
88 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
89}
90
Marat Dukhan9084fc82021-12-31 10:16:09 -080091#ifndef XNN_NO_QS8_OPERATORS
92static void xnnpack_elu_qs8(benchmark::State& state) {
93 const size_t batch_size = state.range(0);
94
95 std::random_device random_device;
96 auto rng = std::mt19937(random_device());
97 auto i8rng = std::bind(
98 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
99 std::ref(rng));
100
101 std::vector<int8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(int8_t));
102 std::vector<int8_t> output(batch_size);
103 std::generate(input.begin(), input.end(), std::ref(i8rng));
104 std::fill(output.begin(), output.end(), INT8_C(0xA5));
105
106 xnn_status status = xnn_initialize(nullptr /* allocator */);
107 if (status != xnn_status_success) {
108 state.SkipWithError("failed to initialize XNNPACK");
109 return;
110 }
111
112 xnn_operator_t elu_op = nullptr;
113 status = xnn_create_elu_nc_qs8(
114 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
115 1.0f /* alpha */,
116 0 /* input zero point */, 1.0f /* input scale */,
117 0 /* output zero point */, 1.0f /* output scale */,
118 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
119 0 /* flags */, &elu_op);
120 if (status != xnn_status_success || elu_op == nullptr) {
121 state.SkipWithError("failed to create ELU operator");
122 return;
123 }
124
125 status = xnn_setup_elu_nc_qs8(
126 elu_op, batch_size,
127 input.data(), output.data(),
128 nullptr /* thread pool */);
129 if (status != xnn_status_success) {
130 state.SkipWithError("failed to setup ELU operator");
131 return;
132 }
133
134 for (auto _ : state) {
135 status = xnn_run_operator(elu_op, nullptr /* thread pool */);
136 if (status != xnn_status_success) {
137 state.SkipWithError("failed to run ELU operator");
138 return;
139 }
140 }
141
142 status = xnn_delete_operator(elu_op);
143 if (status != xnn_status_success) {
144 state.SkipWithError("failed to delete ELU operator");
145 return;
146 }
147
148 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
149 if (cpu_frequency != 0) {
150 state.counters["cpufreq"] = cpu_frequency;
151 }
152
153 state.counters["elements"] =
154 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
155
156 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
157 state.counters["bytes"] =
158 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
159}
160#endif // XNN_NO_QS8_OPERATORS
161
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800162#ifdef BENCHMARK_TENSORFLOW_LITE
163static void tflite_elu_f32(benchmark::State& state) {
164 const size_t batch_size = state.range(0);
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800165
166 std::random_device random_device;
167 auto rng = std::mt19937(random_device());
168 auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 20.0f), std::ref(rng));
169
170 flatbuffers::FlatBufferBuilder builder;
171 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
172 CreateOperatorCode(builder, tflite::BuiltinOperator_ELU);
173
174 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
175 tflite::CreateBuffer(builder, builder.CreateVector({})),
176 }};
177
Marat Dukhana0129e92021-12-30 15:59:28 -0800178 const std::array<int32_t, 1> shape{{
179 static_cast<int32_t>(batch_size)
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800180 }};
181
182 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
183 tflite::CreateTensor(builder,
Marat Dukhana0129e92021-12-30 15:59:28 -0800184 builder.CreateVector<int32_t>(shape.data(), shape.size()),
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800185 tflite::TensorType_FLOAT32),
186 tflite::CreateTensor(builder,
Marat Dukhana0129e92021-12-30 15:59:28 -0800187 builder.CreateVector<int32_t>(shape.data(), shape.size()),
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800188 tflite::TensorType_FLOAT32),
189 }};
190
191 const std::array<int32_t, 1> op_inputs{{ 0 }};
192 const std::array<int32_t, 1> op_outputs{{ 1 }};
193 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
194 builder,
195 0 /* opcode_index */,
196 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
197 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
198
199 const std::array<int32_t, 1> graph_inputs{{ 0 }};
200 const std::array<int32_t, 1> graph_outputs{{ 1 }};
201 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
202 builder,
203 builder.CreateVector(tensors.data(), tensors.size()),
204 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
205 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
206 builder.CreateVector(&op, 1));
207
208 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
209 TFLITE_SCHEMA_VERSION,
210 builder.CreateVector(&operator_code, 1),
211 builder.CreateVector(&subgraph, 1),
212 builder.CreateString("ELU model"),
213 builder.CreateVector(buffers.data(), buffers.size()));
214
215 builder.Finish(model_buffer);
216
217 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
Chao Meif9fdaa72021-05-18 23:04:34 -0700218 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800219 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
220 std::unique_ptr<tflite::Interpreter> interpreter;
Marat Dukhana0129e92021-12-30 15:59:28 -0800221 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800222 state.SkipWithError("failed to create TFLite interpreter");
223 return;
224 }
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800225 interpreter->SetNumThreads(1);
226
227 if (interpreter->AllocateTensors() != kTfLiteOk) {
228 state.SkipWithError("failed to allocate tensors");
229 return;
230 }
231
232 std::generate(
233 interpreter->typed_tensor<float>(0),
Marat Dukhana0129e92021-12-30 15:59:28 -0800234 interpreter->typed_tensor<float>(0) + batch_size,
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800235 std::ref(f32rng));
236
237 for (auto _ : state) {
238 if (interpreter->Invoke() != kTfLiteOk) {
239 state.SkipWithError("failed to invoke TFLite interpreter");
240 return;
241 }
242 }
243
244 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
245 if (cpu_frequency != 0) {
246 state.counters["cpufreq"] = cpu_frequency;
247 }
248
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800249 state.counters["elements"] =
Marat Dukhana0129e92021-12-30 15:59:28 -0800250 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800251
Marat Dukhana0129e92021-12-30 15:59:28 -0800252 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800253 state.counters["bytes"] =
254 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
255
256 interpreter.reset();
257}
Marat Dukhan9084fc82021-12-31 10:16:09 -0800258
259static void tflite_elu_qs8(benchmark::State& state) {
260 const size_t batch_size = state.range(0);
261
262 std::random_device random_device;
263 auto rng = std::mt19937(random_device());
264 auto i8rng = std::bind(
265 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
266 std::ref(rng));
267
268 flatbuffers::FlatBufferBuilder builder;
269 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
270 CreateOperatorCode(builder, tflite::BuiltinOperator_ELU);
271
272 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
273 tflite::CreateBuffer(builder, builder.CreateVector({})),
274 }};
275
276 const std::array<int32_t, 1> shape{{
277 static_cast<int32_t>(batch_size)
278 }};
279
280 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
281 tflite::CreateTensor(builder,
282 builder.CreateVector<int32_t>(shape.data(), shape.size()),
283 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
284 tflite::CreateQuantizationParameters(builder,
285 0 /*min*/, 0 /*max*/,
286 builder.CreateVector<float>({1.0f /* scale */}),
287 builder.CreateVector<int64_t>({1 /* zero point */}))),
288 tflite::CreateTensor(builder,
289 builder.CreateVector<int32_t>(shape.data(), shape.size()),
290 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
291 tflite::CreateQuantizationParameters(builder,
292 0 /*min*/, 0 /*max*/,
293 builder.CreateVector<float>({1.0f /* scale */}),
294 builder.CreateVector<int64_t>({1 /* zero point */}))),
295 }};
296
297 const std::array<int32_t, 1> op_inputs{{ 0 }};
298 const std::array<int32_t, 1> op_outputs{{ 1 }};
299 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
300 builder,
301 0 /* opcode_index */,
302 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
303 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
304
305 const std::array<int32_t, 1> graph_inputs{{ 0 }};
306 const std::array<int32_t, 1> graph_outputs{{ 1 }};
307 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
308 builder,
309 builder.CreateVector(tensors.data(), tensors.size()),
310 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
311 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
312 builder.CreateVector(&op, 1));
313
314 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
315 TFLITE_SCHEMA_VERSION,
316 builder.CreateVector(&operator_code, 1),
317 builder.CreateVector(&subgraph, 1),
318 builder.CreateString("ELU model"),
319 builder.CreateVector(buffers.data(), buffers.size()));
320
321 builder.Finish(model_buffer);
322
323 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
324 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
325 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
326 std::unique_ptr<tflite::Interpreter> interpreter;
327 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
328 state.SkipWithError("failed to create TFLite interpreter");
329 return;
330 }
331 interpreter->SetNumThreads(1);
332
333 if (interpreter->AllocateTensors() != kTfLiteOk) {
334 state.SkipWithError("failed to allocate tensors");
335 return;
336 }
337
338 std::generate(
339 interpreter->typed_tensor<int8_t>(0),
340 interpreter->typed_tensor<int8_t>(0) + batch_size,
341 std::ref(i8rng));
342
343 for (auto _ : state) {
344 if (interpreter->Invoke() != kTfLiteOk) {
345 state.SkipWithError("failed to invoke TFLite interpreter");
346 return;
347 }
348 }
349
350 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
351 if (cpu_frequency != 0) {
352 state.counters["cpufreq"] = cpu_frequency;
353 }
354
355 state.counters["elements"] =
356 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
357
358 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
359 state.counters["bytes"] =
360 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
361
362 interpreter.reset();
363}
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800364#endif // BENCHMARK_TENSORFLOW_LITE
365
Marat Dukhana0129e92021-12-30 15:59:28 -0800366BENCHMARK(xnnpack_elu_f32)
367 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
368 ->UseRealTime();
Marat Dukhan9084fc82021-12-31 10:16:09 -0800369#ifndef XNN_NO_QS8_OPERATORS
370 BENCHMARK(xnnpack_elu_qs8)
371 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
372 ->UseRealTime();
373#endif // XNN_NO_QS8_OPERATORS
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800374
375#ifdef BENCHMARK_TENSORFLOW_LITE
Marat Dukhana0129e92021-12-30 15:59:28 -0800376 BENCHMARK(tflite_elu_f32)
377 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
378 ->UseRealTime();
Marat Dukhan9084fc82021-12-31 10:16:09 -0800379 BENCHMARK(tflite_elu_qs8)
380 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
381 ->UseRealTime();
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -0800382#endif // BENCHMARK_TENSORFLOW_LITE
383
384#ifndef XNNPACK_BENCHMARK_NO_MAIN
385BENCHMARK_MAIN();
386#endif