blob: 626d715d86b0d4daa86fe53018617e0399360971 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
Marat Dukhan401d97b2020-12-02 12:32:09 -08004// Copyright 2020 Google LLC
5//
XNNPACK Teamb455b122019-09-27 18:10:33 -07006// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
Marat Dukhan401d97b2020-12-02 12:32:09 -080010#include <array>
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <cmath>
12#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070013#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <random>
15#include <vector>
16
17#include <xnnpack.h>
18
19#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070020#include "bench/utils.h"
Marat Dukhanc3b9e862019-11-17 13:18:54 -080021#ifdef BENCHMARK_TENSORFLOW_LITE
22#include "flatbuffers/include/flatbuffers/flatbuffers.h"
23#include "tensorflow/lite/interpreter.h"
24#include "tensorflow/lite/kernels/register.h"
25#include "tensorflow/lite/model.h"
26#include "tensorflow/lite/schema/schema_generated.h"
27#include "tensorflow/lite/version.h"
28#endif // BENCHMARK_TENSORFLOW_LITE
XNNPACK Teamb455b122019-09-27 18:10:33 -070029
Marat Dukhan401d97b2020-12-02 12:32:09 -080030
Marat Dukhanc3b9e862019-11-17 13:18:54 -080031static void xnnpack_sigmoid_f32(benchmark::State& state) {
32 const size_t batch_size = state.range(0);
Marat Dukhan346a9e52019-11-15 09:06:30 -080033
34 std::random_device random_device;
35 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070036 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhan346a9e52019-11-15 09:06:30 -080037
Marat Dukhana0129e92021-12-30 15:59:28 -080038 std::vector<float> input(batch_size + XNN_EXTRA_BYTES / sizeof(float));
39 std::vector<float> output(batch_size);
Marat Dukhan346a9e52019-11-15 09:06:30 -080040 std::generate(input.begin(), input.end(), std::ref(f32rng));
41 std::fill(output.begin(), output.end(), std::nanf(""));
42
Marat Dukhan04f03be2019-11-19 12:36:47 -080043 xnn_status status = xnn_initialize(nullptr /* allocator */);
Marat Dukhan346a9e52019-11-15 09:06:30 -080044 if (status != xnn_status_success) {
45 state.SkipWithError("failed to initialize XNNPACK");
46 return;
47 }
48
49 xnn_operator_t sigmoid_op = nullptr;
50 status = xnn_create_sigmoid_nc_f32(
Marat Dukhana0129e92021-12-30 15:59:28 -080051 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
Marat Dukhan346a9e52019-11-15 09:06:30 -080052 0 /* flags */, &sigmoid_op);
53 if (status != xnn_status_success || sigmoid_op == nullptr) {
54 state.SkipWithError("failed to create Sigmoid operator");
55 return;
56 }
57
58 status = xnn_setup_sigmoid_nc_f32(
Marat Dukhana0129e92021-12-30 15:59:28 -080059 sigmoid_op, batch_size,
Marat Dukhan346a9e52019-11-15 09:06:30 -080060 input.data(), output.data(),
61 nullptr /* thread pool */);
62 if (status != xnn_status_success) {
63 state.SkipWithError("failed to setup Sigmoid operator");
64 return;
65 }
66
67 for (auto _ : state) {
68 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
69 if (status != xnn_status_success) {
70 state.SkipWithError("failed to run Sigmoid operator");
71 return;
72 }
73 }
74
75 status = xnn_delete_operator(sigmoid_op);
76 if (status != xnn_status_success) {
77 state.SkipWithError("failed to delete Sigmoid operator");
78 return;
79 }
80
Marat Dukhan401d97b2020-12-02 12:32:09 -080081 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
82 if (cpu_frequency != 0) {
83 state.counters["cpufreq"] = cpu_frequency;
84 }
Marat Dukhan346a9e52019-11-15 09:06:30 -080085
Marat Dukhan346a9e52019-11-15 09:06:30 -080086 state.counters["elements"] =
Marat Dukhana0129e92021-12-30 15:59:28 -080087 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
Marat Dukhan346a9e52019-11-15 09:06:30 -080088
Marat Dukhana0129e92021-12-30 15:59:28 -080089 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
Marat Dukhan346a9e52019-11-15 09:06:30 -080090 state.counters["bytes"] =
91 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
92}
93
Marat Dukhan9084fc82021-12-31 10:16:09 -080094#ifndef XNN_NO_QS8_OPERATORS
95static void xnnpack_sigmoid_qs8(benchmark::State& state) {
96 const size_t batch_size = state.range(0);
97
98 std::random_device random_device;
99 auto rng = std::mt19937(random_device());
100 auto i8rng = std::bind(
101 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
102 std::ref(rng));
103
104 std::vector<int8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(int8_t));
105 std::vector<int8_t> output(batch_size);
106 std::generate(input.begin(), input.end(), std::ref(i8rng));
107 std::fill(output.begin(), output.end(), INT8_C(0xA5));
108
109 xnn_status status = xnn_initialize(nullptr /* allocator */);
110 if (status != xnn_status_success) {
111 state.SkipWithError("failed to initialize XNNPACK");
112 return;
113 }
114
115 xnn_operator_t sigmoid_op = nullptr;
116 status = xnn_create_sigmoid_nc_qs8(
117 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
118 1 /* input zero point */, 1.0f /* input scale */,
119 -128 /* output zero point */, 1.0f / 256.0f /* output scale */,
120 std::numeric_limits<int8_t>::min() /* output min */, std::numeric_limits<int8_t>::max() /* output max */,
121 0 /* flags */, &sigmoid_op);
122 if (status != xnn_status_success || sigmoid_op == nullptr) {
123 state.SkipWithError("failed to create Sigmoid operator");
124 return;
125 }
126
127 status = xnn_setup_sigmoid_nc_qs8(
128 sigmoid_op, batch_size,
129 input.data(), output.data(),
130 nullptr /* thread pool */);
131 if (status != xnn_status_success) {
132 state.SkipWithError("failed to setup Sigmoid operator");
133 return;
134 }
135
136 for (auto _ : state) {
137 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
138 if (status != xnn_status_success) {
139 state.SkipWithError("failed to run Sigmoid operator");
140 return;
141 }
142 }
143
144 status = xnn_delete_operator(sigmoid_op);
145 if (status != xnn_status_success) {
146 state.SkipWithError("failed to delete Sigmoid operator");
147 return;
148 }
149
150 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
151 if (cpu_frequency != 0) {
152 state.counters["cpufreq"] = cpu_frequency;
153 }
154
155 state.counters["elements"] =
156 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
157
158 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
159 state.counters["bytes"] =
160 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
161}
162#endif // XNN_NO_QS8_OPERATORS
163
Marat Dukhana0129e92021-12-30 15:59:28 -0800164#ifndef XNN_NO_QU8_OPERATORS
165static void xnnpack_sigmoid_qu8(benchmark::State& state) {
166 const size_t batch_size = state.range(0);
167
168 std::random_device random_device;
169 auto rng = std::mt19937(random_device());
170 auto u8rng = std::bind(
171 std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
172
173 std::vector<uint8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(uint8_t));
174 std::vector<uint8_t> output(batch_size);
175 std::generate(input.begin(), input.end(), std::ref(u8rng));
Marat Dukhan9084fc82021-12-31 10:16:09 -0800176 std::fill(output.begin(), output.end(), UINT8_C(0xA5));
Marat Dukhana0129e92021-12-30 15:59:28 -0800177
178 xnn_status status = xnn_initialize(nullptr /* allocator */);
179 if (status != xnn_status_success) {
180 state.SkipWithError("failed to initialize XNNPACK");
181 return;
182 }
183
184 xnn_operator_t sigmoid_op = nullptr;
185 status = xnn_create_sigmoid_nc_qu8(
186 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
Marat Dukhan9084fc82021-12-31 10:16:09 -0800187 128 /* input zero point */, 1.0f /* input scale */,
Marat Dukhana0129e92021-12-30 15:59:28 -0800188 0 /* output zero point */, 1.0f / 256.0f /* output scale */,
Marat Dukhan9084fc82021-12-31 10:16:09 -0800189 std::numeric_limits<uint8_t>::min() /* output min */, std::numeric_limits<uint8_t>::max() /* output max */,
Marat Dukhana0129e92021-12-30 15:59:28 -0800190 0 /* flags */, &sigmoid_op);
191 if (status != xnn_status_success || sigmoid_op == nullptr) {
192 state.SkipWithError("failed to create Sigmoid operator");
193 return;
194 }
195
196 status = xnn_setup_sigmoid_nc_qu8(
197 sigmoid_op, batch_size,
198 input.data(), output.data(),
199 nullptr /* thread pool */);
200 if (status != xnn_status_success) {
201 state.SkipWithError("failed to setup Sigmoid operator");
202 return;
203 }
204
205 for (auto _ : state) {
206 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
207 if (status != xnn_status_success) {
208 state.SkipWithError("failed to run Sigmoid operator");
209 return;
210 }
211 }
212
213 status = xnn_delete_operator(sigmoid_op);
214 if (status != xnn_status_success) {
215 state.SkipWithError("failed to delete Sigmoid operator");
216 return;
217 }
218
219 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
220 if (cpu_frequency != 0) {
221 state.counters["cpufreq"] = cpu_frequency;
222 }
223
224 state.counters["elements"] =
225 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
226
227 const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
228 state.counters["bytes"] =
229 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
230}
231#endif // XNN_NO_QU8_OPERATORS
232
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800233#ifdef BENCHMARK_TENSORFLOW_LITE
234static void tflite_sigmoid_f32(benchmark::State& state) {
235 const size_t batch_size = state.range(0);
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800236
237 std::random_device random_device;
238 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700239 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800240
241 flatbuffers::FlatBufferBuilder builder;
Marat Dukhan401d97b2020-12-02 12:32:09 -0800242 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800243 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
244
Marat Dukhan401d97b2020-12-02 12:32:09 -0800245 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800246 tflite::CreateBuffer(builder, builder.CreateVector({})),
Marat Dukhan401d97b2020-12-02 12:32:09 -0800247 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800248
Marat Dukhana0129e92021-12-30 15:59:28 -0800249 const std::array<int32_t, 1> shape{{
250 static_cast<int32_t>(batch_size)
Marat Dukhan401d97b2020-12-02 12:32:09 -0800251 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800252
Marat Dukhan401d97b2020-12-02 12:32:09 -0800253 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800254 tflite::CreateTensor(builder,
Marat Dukhana0129e92021-12-30 15:59:28 -0800255 builder.CreateVector<int32_t>(shape.data(), shape.size()),
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800256 tflite::TensorType_FLOAT32),
257 tflite::CreateTensor(builder,
Marat Dukhana0129e92021-12-30 15:59:28 -0800258 builder.CreateVector<int32_t>(shape.data(), shape.size()),
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800259 tflite::TensorType_FLOAT32),
Marat Dukhan401d97b2020-12-02 12:32:09 -0800260 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800261
Marat Dukhan401d97b2020-12-02 12:32:09 -0800262 const std::array<int32_t, 1> op_inputs{{ 0 }};
263 const std::array<int32_t, 1> op_outputs{{ 1 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800264 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
265 builder,
266 0 /* opcode_index */,
Marat Dukhan401d97b2020-12-02 12:32:09 -0800267 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
268 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800269
Marat Dukhan401d97b2020-12-02 12:32:09 -0800270 const std::array<int32_t, 1> graph_inputs{{ 0 }};
271 const std::array<int32_t, 1> graph_outputs{{ 1 }};
272 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800273 builder,
Marat Dukhan401d97b2020-12-02 12:32:09 -0800274 builder.CreateVector(tensors.data(), tensors.size()),
275 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
276 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800277 builder.CreateVector(&op, 1));
278
Marat Dukhan401d97b2020-12-02 12:32:09 -0800279 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800280 TFLITE_SCHEMA_VERSION,
281 builder.CreateVector(&operator_code, 1),
282 builder.CreateVector(&subgraph, 1),
Marat Dukhan401d97b2020-12-02 12:32:09 -0800283 builder.CreateString("Sigmoid model"),
284 builder.CreateVector(buffers.data(), buffers.size()));
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800285
286 builder.Finish(model_buffer);
287
288 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
Chao Meif9fdaa72021-05-18 23:04:34 -0700289 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800290 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
291 std::unique_ptr<tflite::Interpreter> interpreter;
Marat Dukhana0129e92021-12-30 15:59:28 -0800292 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800293 state.SkipWithError("failed to create TFLite interpreter");
294 return;
295 }
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800296 interpreter->SetNumThreads(1);
297
298 if (interpreter->AllocateTensors() != kTfLiteOk) {
299 state.SkipWithError("failed to allocate tensors");
300 return;
301 }
302
303 std::generate(
304 interpreter->typed_tensor<float>(0),
Marat Dukhana0129e92021-12-30 15:59:28 -0800305 interpreter->typed_tensor<float>(0) + batch_size,
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800306 std::ref(f32rng));
307
308 for (auto _ : state) {
309 if (interpreter->Invoke() != kTfLiteOk) {
310 state.SkipWithError("failed to invoke TFLite interpreter");
311 return;
312 }
313 }
314
Marat Dukhan401d97b2020-12-02 12:32:09 -0800315 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
316 if (cpu_frequency != 0) {
317 state.counters["cpufreq"] = cpu_frequency;
318 }
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800319
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800320 state.counters["elements"] =
Marat Dukhana0129e92021-12-30 15:59:28 -0800321 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800322
Marat Dukhana0129e92021-12-30 15:59:28 -0800323 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800324 state.counters["bytes"] =
325 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
326
327 interpreter.reset();
328}
Marat Dukhan9084fc82021-12-31 10:16:09 -0800329
330static void tflite_sigmoid_qs8(benchmark::State& state) {
331 const size_t batch_size = state.range(0);
332
333 std::random_device random_device;
334 auto rng = std::mt19937(random_device());
335 auto i8rng = std::bind(
336 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
337 std::ref(rng));
338
339 flatbuffers::FlatBufferBuilder builder;
340 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
341 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
342
343 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
344 tflite::CreateBuffer(builder, builder.CreateVector({})),
345 }};
346
347 const std::array<int32_t, 1> shape{{
348 static_cast<int32_t>(batch_size)
349 }};
350
351 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
352 tflite::CreateTensor(builder,
353 builder.CreateVector<int32_t>(shape.data(), shape.size()),
354 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
355 tflite::CreateQuantizationParameters(builder,
356 0 /*min*/, 0 /*max*/,
357 builder.CreateVector<float>({1.0f /* scale */}),
358 builder.CreateVector<int64_t>({1 /* zero point */}))),
359 tflite::CreateTensor(builder,
360 builder.CreateVector<int32_t>(shape.data(), shape.size()),
361 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
362 tflite::CreateQuantizationParameters(builder,
363 0 /*min*/, 0 /*max*/,
364 builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
365 builder.CreateVector<int64_t>({-128 /* zero point */}))),
366 }};
367
368 const std::array<int32_t, 1> op_inputs{{ 0 }};
369 const std::array<int32_t, 1> op_outputs{{ 1 }};
370 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
371 builder,
372 0 /* opcode_index */,
373 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
374 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
375
376 const std::array<int32_t, 1> graph_inputs{{ 0 }};
377 const std::array<int32_t, 1> graph_outputs{{ 1 }};
378 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
379 builder,
380 builder.CreateVector(tensors.data(), tensors.size()),
381 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
382 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
383 builder.CreateVector(&op, 1));
384
385 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
386 TFLITE_SCHEMA_VERSION,
387 builder.CreateVector(&operator_code, 1),
388 builder.CreateVector(&subgraph, 1),
389 builder.CreateString("Sigmoid model"),
390 builder.CreateVector(buffers.data(), buffers.size()));
391
392 builder.Finish(model_buffer);
393
394 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
395 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
396 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
397 std::unique_ptr<tflite::Interpreter> interpreter;
398 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
399 state.SkipWithError("failed to create TFLite interpreter");
400 return;
401 }
402 interpreter->SetNumThreads(1);
403
404 if (interpreter->AllocateTensors() != kTfLiteOk) {
405 state.SkipWithError("failed to allocate tensors");
406 return;
407 }
408
409 std::generate(
410 interpreter->typed_tensor<int8_t>(0),
411 interpreter->typed_tensor<int8_t>(0) + batch_size,
412 std::ref(i8rng));
413
414 for (auto _ : state) {
415 if (interpreter->Invoke() != kTfLiteOk) {
416 state.SkipWithError("failed to invoke TFLite interpreter");
417 return;
418 }
419 }
420
421 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
422 if (cpu_frequency != 0) {
423 state.counters["cpufreq"] = cpu_frequency;
424 }
425
426 state.counters["elements"] =
427 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
428
429 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
430 state.counters["bytes"] =
431 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
432
433 interpreter.reset();
434}
435
436static void tflite_sigmoid_qu8(benchmark::State& state) {
437 const size_t batch_size = state.range(0);
438
439 std::random_device random_device;
440 auto rng = std::mt19937(random_device());
441 auto u8rng = std::bind(
442 std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()),
443 std::ref(rng));
444
445 flatbuffers::FlatBufferBuilder builder;
446 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
447 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
448
449 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
450 tflite::CreateBuffer(builder, builder.CreateVector({})),
451 }};
452
453 const std::array<int32_t, 1> shape{{
454 static_cast<int32_t>(batch_size)
455 }};
456
457 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
458 tflite::CreateTensor(builder,
459 builder.CreateVector<int32_t>(shape.data(), shape.size()),
460 tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
461 tflite::CreateQuantizationParameters(builder,
462 0 /*min*/, 0 /*max*/,
463 builder.CreateVector<float>({1.0f /* scale */}),
464 builder.CreateVector<int64_t>({128 /* zero point */}))),
465 tflite::CreateTensor(builder,
466 builder.CreateVector<int32_t>(shape.data(), shape.size()),
467 tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
468 tflite::CreateQuantizationParameters(builder,
469 0 /*min*/, 0 /*max*/,
470 builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
471 builder.CreateVector<int64_t>({0 /* zero point */}))),
472 }};
473
474 const std::array<int32_t, 1> op_inputs{{ 0 }};
475 const std::array<int32_t, 1> op_outputs{{ 1 }};
476 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
477 builder,
478 0 /* opcode_index */,
479 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
480 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
481
482 const std::array<int32_t, 1> graph_inputs{{ 0 }};
483 const std::array<int32_t, 1> graph_outputs{{ 1 }};
484 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
485 builder,
486 builder.CreateVector(tensors.data(), tensors.size()),
487 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
488 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
489 builder.CreateVector(&op, 1));
490
491 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
492 TFLITE_SCHEMA_VERSION,
493 builder.CreateVector(&operator_code, 1),
494 builder.CreateVector(&subgraph, 1),
495 builder.CreateString("Sigmoid model"),
496 builder.CreateVector(buffers.data(), buffers.size()));
497
498 builder.Finish(model_buffer);
499
500 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
501 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
502 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
503 std::unique_ptr<tflite::Interpreter> interpreter;
504 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
505 state.SkipWithError("failed to create TFLite interpreter");
506 return;
507 }
508 interpreter->SetNumThreads(1);
509
510 if (interpreter->AllocateTensors() != kTfLiteOk) {
511 state.SkipWithError("failed to allocate tensors");
512 return;
513 }
514
515 std::generate(
516 interpreter->typed_tensor<uint8_t>(0),
517 interpreter->typed_tensor<uint8_t>(0) + batch_size,
518 std::ref(u8rng));
519
520 for (auto _ : state) {
521 if (interpreter->Invoke() != kTfLiteOk) {
522 state.SkipWithError("failed to invoke TFLite interpreter");
523 return;
524 }
525 }
526
527 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
528 if (cpu_frequency != 0) {
529 state.counters["cpufreq"] = cpu_frequency;
530 }
531
532 state.counters["elements"] =
533 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
534
535 const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
536 state.counters["bytes"] =
537 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
538
539 interpreter.reset();
540}
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800541#endif // BENCHMARK_TENSORFLOW_LITE
542
Marat Dukhana0129e92021-12-30 15:59:28 -0800543BENCHMARK(xnnpack_sigmoid_f32)
544 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
545 ->UseRealTime();
Marat Dukhan9084fc82021-12-31 10:16:09 -0800546#ifndef XNN_NO_QS8_OPERATORS
547 BENCHMARK(xnnpack_sigmoid_qs8)
548 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
549 ->UseRealTime();
550#endif // XNN_NO_QS8_OPERATORS
Chao Meic6640272020-07-23 09:35:11 -0700551#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhana0129e92021-12-30 15:59:28 -0800552 BENCHMARK(xnnpack_sigmoid_qu8)
553 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
554 ->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -0700555#endif // XNN_NO_QU8_OPERATORS
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800556
557#ifdef BENCHMARK_TENSORFLOW_LITE
Marat Dukhana0129e92021-12-30 15:59:28 -0800558 BENCHMARK(tflite_sigmoid_f32)
559 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
560 ->UseRealTime();
Marat Dukhan9084fc82021-12-31 10:16:09 -0800561 BENCHMARK(tflite_sigmoid_qs8)
562 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
563 ->UseRealTime();
564 BENCHMARK(tflite_sigmoid_qu8)
565 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
566 ->UseRealTime();
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800567#endif // BENCHMARK_TENSORFLOW_LITE
XNNPACK Teamb455b122019-09-27 18:10:33 -0700568
569#ifndef XNNPACK_BENCHMARK_NO_MAIN
570BENCHMARK_MAIN();
571#endif